Merge branch 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6

* 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
  SLUB: Fix per-cpu merge conflict
  failslab: add ability to filter slab caches
  slab: fix regression in touched logic
  dma kmalloc handling fixes
  slub: remove impossible condition
  slab: initialize unused alien cache entry as NULL at alloc_alien_cache().
  SLUB: Make slub statistics use this_cpu_inc
  SLUB: this_cpu: Remove slub kmem_cache fields
  SLUB: Get rid of dynamic DMA kmalloc cache allocation
  SLUB: Use this_cpu operations in slub
diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd
index 2c558cd..f4dc9de 100644
--- a/Documentation/cdrom/ide-cd
+++ b/Documentation/cdrom/ide-cd
@@ -159,42 +159,7 @@
 to change.  If the slot number is -1, the drive is unloaded.
 
 
-4. Compilation options
-----------------------
-
-There are a few additional options which can be set when compiling the
-driver.  Most people should not need to mess with any of these; they
-are listed here simply for completeness.  A compilation option can be
-enabled by adding a line of the form `#define <option> 1' to the top
-of ide-cd.c.  All these options are disabled by default.
-
-VERBOSE_IDE_CD_ERRORS
-  If this is set, ATAPI error codes will be translated into textual
-  descriptions.  In addition, a dump is made of the command which
-  provoked the error.  This is off by default to save the memory used
-  by the (somewhat long) table of error descriptions.  
-
-STANDARD_ATAPI
-  If this is set, the code needed to deal with certain drives which do
-  not properly implement the ATAPI spec will be disabled.  If you know
-  your drive implements ATAPI properly, you can turn this on to get a
-  slightly smaller kernel.
-
-NO_DOOR_LOCKING
-  If this is set, the driver will never attempt to lock the door of
-  the drive.
-
-CDROM_NBLOCKS_BUFFER
-  This sets the size of the buffer to be used for a CDROMREADAUDIO
-  ioctl.  The default is 8.
-
-TEST
-  This currently enables an additional ioctl which enables a user-mode
-  program to execute an arbitrary packet command.  See the source for
-  details.  This should be left off unless you know what you're doing.
-
-
-5. Common problems
+4. Common problems
 ------------------
 
 This section discusses some common problems encountered when trying to
@@ -371,7 +336,7 @@
     expense of low system performance.
 
 
-6. cdchange.c
+5. cdchange.c
 -------------
 
 /*
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 73ef30d..31575e22 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -117,19 +117,25 @@
 ---------------------------
 
 What:	PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
-When:	November 2005
+When:	2.6.35/2.6.36
 Files:	drivers/pcmcia/: pcmcia_ioctl.c
 Why:	With the 16-bit PCMCIA subsystem now behaving (almost) like a
 	normal hotpluggable bus, and with it using the default kernel
 	infrastructure (hotplug, driver core, sysfs) keeping the PCMCIA
 	control ioctl needed by cardmgr and cardctl from pcmcia-cs is
-	unnecessary, and makes further cleanups and integration of the
+	unnecessary and potentially harmful (it does not provide for
+	proper locking), and makes further cleanups and integration of the
 	PCMCIA subsystem into the Linux kernel device driver model more
 	difficult. The features provided by cardmgr and cardctl are either
 	handled by the kernel itself now or are available in the new
 	pcmciautils package available at
 	http://kernel.org/pub/linux/utils/kernel/pcmcia/
-Who:	Dominik Brodowski <linux@brodo.de>
+
+	For all architectures except ARM, the associated config symbol
+	has been removed from kernel 2.6.34; for ARM, it will be likely
+	be removed from kernel 2.6.35. The actual code will then likely
+	be removed from kernel 2.6.36.
+Who:	Dominik Brodowski <linux@dominikbrodowski.net>
 
 ---------------------------
 
@@ -550,3 +556,35 @@
 	NCCI TTY device nodes. User space (pppdcapiplugin) works without
 	noticing the difference.
 Who:	Jan Kiszka <jan.kiszka@web.de>
+
+----------------------------
+
+What:	KVM memory aliases support
+When:	July 2010
+Why:	Memory aliasing support is used for speeding up guest vga access
+	through the vga windows.
+
+	Modern userspace no longer uses this feature, so it's just bitrotted
+	code and can be removed with no impact.
+Who:	Avi Kivity <avi@redhat.com>
+
+----------------------------
+
+What:	KVM kernel-allocated memory slots
+When:	July 2010
+Why:	Since 2.6.25, kvm supports user-allocated memory slots, which are
+	much more flexible than kernel-allocated slots.  All current userspace
+	supports the newer interface and this code can be removed with no
+	impact.
+Who:	Avi Kivity <avi@redhat.com>
+
+----------------------------
+
+What:	KVM paravirt mmu host support
+When:	January 2011
+Why:	The paravirt mmu host support is slower than non-paravirt mmu, both
+	on newer and older hardware.  It is already not exposed to the guest,
+	and kept only for live migration purposes.
+Who:	Avi Kivity <avi@redhat.com>
+
+----------------------------
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 18b9d0c..06bbbed 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -460,13 +460,6 @@
 
 --------------------------- dquot_operations -------------------------------
 prototypes:
-	int (*initialize) (struct inode *, int);
-	int (*drop) (struct inode *);
-	int (*alloc_space) (struct inode *, qsize_t, int);
-	int (*alloc_inode) (const struct inode *, unsigned long);
-	int (*free_space) (struct inode *, qsize_t);
-	int (*free_inode) (const struct inode *, unsigned long);
-	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
 	int (*release_dquot) (struct dquot *);
@@ -479,13 +472,6 @@
 What filesystem should expect from the generic quota functions:
 
 		FS recursion	Held locks when called
-initialize:	yes		maybe dqonoff_sem
-drop:		yes		-
-alloc_space:	->mark_dirty()	-
-alloc_inode:	->mark_dirty()	-
-free_space:	->mark_dirty()	-
-free_inode:	->mark_dirty()	-
-transfer:	yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 release_dquot:	yes		dqonoff_sem or dqptr_sem
@@ -495,10 +481,6 @@
 FS recursion means calling ->quota_read() and ->quota_write() from superblock
 operations.
 
-->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called
-only directly by the filesystem and do not call any fs functions only
-the ->mark_dirty() operation.
-
 More details about quota locking can be found in fs/dquot.c.
 
 --------------------------- vm_operations_struct -----------------------------
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index 23a1810..fc0e39a 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -837,6 +837,9 @@
 	 individual lists does not affect propagation or the way propagation
 	 tree is modified by operations.
 
+	All vfsmounts in a peer group have the same ->mnt_master.  If it is
+	non-NULL, they form a contiguous (ordered) segment of slave list.
+
 	A example propagation tree looks as shown in the figure below.
 	[ NOTE: Though it looks like a forest, if we consider all the shared
 	mounts as a conceptual entity called 'pnode', it becomes a tree]
@@ -874,8 +877,19 @@
 
 	NOTE: The propagation tree is orthogonal to the mount tree.
 
+8B Locking:
 
-8B Algorithm:
+	->mnt_share, ->mnt_slave, ->mnt_slave_list, ->mnt_master are protected
+	by namespace_sem (exclusive for modifications, shared for reading).
+
+	Normally we have ->mnt_flags modifications serialized by vfsmount_lock.
+	There are two exceptions: do_add_mount() and clone_mnt().
+	The former modifies a vfsmount that has not been visible in any shared
+	data structures yet.
+	The latter holds namespace_sem and the only references to vfsmount
+	are in lists that can't be traversed without namespace_sem.
+
+8C Algorithm:
 
 	The crux of the implementation resides in rbind/move operation.
 
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 053037a..2f9115c 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -1,6 +1,7 @@
 Title	: Kernel Probes (Kprobes)
 Authors	: Jim Keniston <jkenisto@us.ibm.com>
-	: Prasanna S Panchamukhi <prasanna@in.ibm.com>
+	: Prasanna S Panchamukhi <prasanna.panchamukhi@gmail.com>
+	: Masami Hiramatsu <mhiramat@redhat.com>
 
 CONTENTS
 
@@ -15,6 +16,7 @@
 9. Jprobes Example
 10. Kretprobes Example
 Appendix A: The kprobes debugfs interface
+Appendix B: The kprobes sysctl interface
 
 1. Concepts: Kprobes, Jprobes, Return Probes
 
@@ -42,13 +44,13 @@
 can speed up unregistration process when you have to unregister
 a lot of probes at once.
 
-The next three subsections explain how the different types of
-probes work.  They explain certain things that you'll need to
-know in order to make the best use of Kprobes -- e.g., the
-difference between a pre_handler and a post_handler, and how
-to use the maxactive and nmissed fields of a kretprobe.  But
-if you're in a hurry to start using Kprobes, you can skip ahead
-to section 2.
+The next four subsections explain how the different types of
+probes work and how jump optimization works.  They explain certain
+things that you'll need to know in order to make the best use of
+Kprobes -- e.g., the difference between a pre_handler and
+a post_handler, and how to use the maxactive and nmissed fields of
+a kretprobe.  But if you're in a hurry to start using Kprobes, you
+can skip ahead to section 2.
 
 1.1 How Does a Kprobe Work?
 
@@ -161,13 +163,125 @@
 object available, then in addition to incrementing the nmissed count,
 the user entry_handler invocation is also skipped.
 
+1.4 How Does Jump Optimization Work?
+
+If you configured your kernel with CONFIG_OPTPROBES=y (currently
+this option is supported on x86/x86-64, non-preemptive kernel) and
+the "debug.kprobes_optimization" kernel parameter is set to 1 (see
+sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump
+instruction instead of a breakpoint instruction at each probepoint.
+
+1.4.1 Init a Kprobe
+
+When a probe is registered, before attempting this optimization,
+Kprobes inserts an ordinary, breakpoint-based kprobe at the specified
+address. So, even if it's not possible to optimize this particular
+probepoint, there'll be a probe there.
+
+1.4.2 Safety Check
+
+Before optimizing a probe, Kprobes performs the following safety checks:
+
+- Kprobes verifies that the region that will be replaced by the jump
+instruction (the "optimized region") lies entirely within one function.
+(A jump instruction is multiple bytes, and so may overlay multiple
+instructions.)
+
+- Kprobes analyzes the entire function and verifies that there is no
+jump into the optimized region.  Specifically:
+  - the function contains no indirect jump;
+  - the function contains no instruction that causes an exception (since
+  the fixup code triggered by the exception could jump back into the
+  optimized region -- Kprobes checks the exception tables to verify this);
+  and
+  - there is no near jump to the optimized region (other than to the first
+  byte).
+
+- For each instruction in the optimized region, Kprobes verifies that
+the instruction can be executed out of line.
+
+1.4.3 Preparing Detour Buffer
+
+Next, Kprobes prepares a "detour" buffer, which contains the following
+instruction sequence:
+- code to push the CPU's registers (emulating a breakpoint trap)
+- a call to the trampoline code which calls user's probe handlers.
+- code to restore registers
+- the instructions from the optimized region
+- a jump back to the original execution path.
+
+1.4.4 Pre-optimization
+
+After preparing the detour buffer, Kprobes verifies that none of the
+following situations exist:
+- The probe has either a break_handler (i.e., it's a jprobe) or a
+post_handler.
+- Other instructions in the optimized region are probed.
+- The probe is disabled.
+In any of the above cases, Kprobes won't start optimizing the probe.
+Since these are temporary situations, Kprobes tries to start
+optimizing it again if the situation is changed.
+
+If the kprobe can be optimized, Kprobes enqueues the kprobe to an
+optimizing list, and kicks the kprobe-optimizer workqueue to optimize
+it.  If the to-be-optimized probepoint is hit before being optimized,
+Kprobes returns control to the original instruction path by setting
+the CPU's instruction pointer to the copied code in the detour buffer
+-- thus at least avoiding the single-step.
+
+1.4.5 Optimization
+
+The Kprobe-optimizer doesn't insert the jump instruction immediately;
+rather, it calls synchronize_sched() for safety first, because it's
+possible for a CPU to be interrupted in the middle of executing the
+optimized region(*).  As you know, synchronize_sched() can ensure
+that all interruptions that were active when synchronize_sched()
+was called are done, but only if CONFIG_PREEMPT=n.  So, this version
+of kprobe optimization supports only kernels with CONFIG_PREEMPT=n.(**)
+
+After that, the Kprobe-optimizer calls stop_machine() to replace
+the optimized region with a jump instruction to the detour buffer,
+using text_poke_smp().
+
+1.4.6 Unoptimization
+
+When an optimized kprobe is unregistered, disabled, or blocked by
+another kprobe, it will be unoptimized.  If this happens before
+the optimization is complete, the kprobe is just dequeued from the
+optimized list.  If the optimization has been done, the jump is
+replaced with the original code (except for an int3 breakpoint in
+the first byte) by using text_poke_smp().
+
+(*)Please imagine that the 2nd instruction is interrupted and then
+the optimizer replaces the 2nd instruction with the jump *address*
+while the interrupt handler is running. When the interrupt
+returns to original address, there is no valid instruction,
+and it causes an unexpected result.
+
+(**)This optimization-safety checking may be replaced with the
+stop-machine method that ksplice uses for supporting a CONFIG_PREEMPT=y
+kernel.
+
+NOTE for geeks:
+The jump optimization changes the kprobe's pre_handler behavior.
+Without optimization, the pre_handler can change the kernel's execution
+path by changing regs->ip and returning 1.  However, when the probe
+is optimized, that modification is ignored.  Thus, if you want to
+tweak the kernel's execution path, you need to suppress optimization,
+using one of the following techniques:
+- Specify an empty function for the kprobe's post_handler or break_handler.
+ or
+- Config CONFIG_OPTPROBES=n.
+ or
+- Execute 'sysctl -w debug.kprobes_optimization=n'
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following
 architectures:
 
-- i386
-- x86_64 (AMD-64, EM64T)
+- i386 (Supports jump optimization)
+- x86_64 (AMD-64, EM64T) (Supports jump optimization)
 - ppc64
 - ia64 (Does not support probes on instruction slot1.)
 - sparc64 (Return probes not yet implemented.)
@@ -193,6 +307,10 @@
 so you can use "objdump -d -l vmlinux" to see the source-to-object
 code mapping.
 
+If you want to reduce probing overhead, set "Kprobes jump optimization
+support" (CONFIG_OPTPROBES) to "y". You can find this option under the
+"Kprobes" line.
+
 4. API Reference
 
 The Kprobes API includes a "register" function and an "unregister"
@@ -389,7 +507,10 @@
 
 Kprobes allows multiple probes at the same address.  Currently,
 however, there cannot be multiple jprobes on the same function at
-the same time.
+the same time.  Also, a probepoint for which there is a jprobe or
+a post_handler cannot be optimized.  So if you install a jprobe,
+or a kprobe with a post_handler, at an optimized probepoint, the
+probepoint will be unoptimized automatically.
 
 In general, you can install a probe anywhere in the kernel.
 In particular, you can probe interrupt handlers.  Known exceptions
@@ -453,6 +574,38 @@
 on the x86_64 version of __switch_to(); the registration functions
 return -EINVAL.
 
+On x86/x86-64, since the Jump Optimization of Kprobes modifies
+instructions widely, there are some limitations to optimization. To
+explain it, we introduce some terminology. Imagine a 3-instruction
+sequence consisting of a two 2-byte instructions and one 3-byte
+instruction.
+
+        IA
+         |
+[-2][-1][0][1][2][3][4][5][6][7]
+        [ins1][ins2][  ins3 ]
+	[<-     DCR       ->]
+	   [<- JTPR ->]
+
+ins1: 1st Instruction
+ins2: 2nd Instruction
+ins3: 3rd Instruction
+IA:  Insertion Address
+JTPR: Jump Target Prohibition Region
+DCR: Detoured Code Region
+
+The instructions in DCR are copied to the out-of-line buffer
+of the kprobe, because the bytes in DCR are replaced by
+a 5-byte jump instruction. So there are several limitations.
+
+a) The instructions in DCR must be relocatable.
+b) The instructions in DCR must not include a call instruction.
+c) JTPR must not be targeted by any jump or call instruction.
+d) DCR must not straddle the border betweeen functions.
+
+Anyway, these limitations are checked by the in-kernel instruction
+decoder, so you don't need to worry about that.
+
 6. Probe Overhead
 
 On a typical CPU in use in 2005, a kprobe hit takes 0.5 to 1.0
@@ -476,6 +629,19 @@
 ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU)
 k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99
 
+6.1 Optimized Probe Overhead
+
+Typically, an optimized kprobe hit takes 0.07 to 0.1 microseconds to
+process. Here are sample overhead figures (in usec) for x86 architectures.
+k = unoptimized kprobe, b = boosted (single-step skipped), o = optimized kprobe,
+r = unoptimized kretprobe, rb = boosted kretprobe, ro = optimized kretprobe.
+
+i386: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
+k = 0.80 usec; b = 0.33; o = 0.05; r = 1.10; rb = 0.61; ro = 0.33
+
+x86-64: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
+k = 0.99 usec; b = 0.43; o = 0.06; r = 1.24; rb = 0.68; ro = 0.30
+
 7. TODO
 
 a. SystemTap (http://sourceware.org/systemtap): Provides a simplified
@@ -523,7 +689,8 @@
 a virtual address that is no longer valid (module init sections, module
 virtual addresses that correspond to modules that've been unloaded),
 such probes are marked with [GONE]. If the probe is temporarily disabled,
-such probes are marked with [DISABLED].
+such probes are marked with [DISABLED]. If the probe is optimized, it is
+marked with [OPTIMIZED].
 
 /sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
 
@@ -533,3 +700,19 @@
 file. Note that this knob just disarms and arms all kprobes and doesn't
 change each probe's disabling state. This means that disabled kprobes (marked
 [DISABLED]) will be not enabled if you turn ON all kprobes by this knob.
+
+
+Appendix B: The kprobes sysctl interface
+
+/proc/sys/debug/kprobes-optimization: Turn kprobes optimization ON/OFF.
+
+When CONFIG_OPTPROBES=y, this sysctl interface appears and it provides
+a knob to globally and forcibly turn jump optimization (see section
+1.4) ON or OFF. By default, jump optimization is allowed (ON).
+If you echo "0" to this file or set "debug.kprobes_optimization" to
+0 via sysctl, all optimized probes will be unoptimized, and any new
+probes registered after that will not be optimized.  Note that this
+knob *changes* the optimized state. This means that optimized probes
+(marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
+removed). If the knob is turned on, they will be optimized again.
+
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 2811e45..c6416a3 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -23,12 +23,12 @@
    Only run vcpu ioctls from the same thread that was used to create the
    vcpu.
 
-2. File descritpors
+2. File descriptors
 
 The kvm API is centered around file descriptors.  An initial
 open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
 can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
-handle will create a VM file descripror which can be used to issue VM
+handle will create a VM file descriptor which can be used to issue VM
 ioctls.  A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
 and return a file descriptor pointing to it.  Finally, ioctls on a vcpu
 fd can be used to control the vcpu, including the important task of
@@ -643,7 +643,7 @@
 Parameters: struct kvm_clock_data (in)
 Returns: 0 on success, -1 on error
 
-Sets the current timestamp of kvmclock to the valued specific in its parameter.
+Sets the current timestamp of kvmclock to the value specified in its parameter.
 In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
 such as migration.
 
@@ -795,11 +795,11 @@
 			__u64 data_offset; /* relative to kvm_run start */
 		} io;
 
-If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
+If exit_reason is KVM_EXIT_IO, then the vcpu has
 executed a port I/O instruction which could not be satisfied by kvm.
 data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
 where kvm expects application code to place the data for the next
-KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a patcked array.
+KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a packed array.
 
 		struct {
 			struct kvm_debug_exit_arch arch;
@@ -815,7 +815,7 @@
 			__u8  is_write;
 		} mmio;
 
-If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
+If exit_reason is KVM_EXIT_MMIO, then the vcpu has
 executed a memory-mapped I/O instruction which could not be satisfied
 by kvm.  The 'data' member contains the written data if 'is_write' is
 true, and should be filled by application code otherwise.
diff --git a/Documentation/powerpc/dts-bindings/fsl/dma.txt b/Documentation/powerpc/dts-bindings/fsl/dma.txt
index 0732cdd..2a4b4bc 100644
--- a/Documentation/powerpc/dts-bindings/fsl/dma.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/dma.txt
@@ -44,21 +44,29 @@
 			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 			cell-index = <0>;
 			reg = <0 0x80>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
 		};
 		dma-channel@80 {
 			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 			cell-index = <1>;
 			reg = <0x80 0x80>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
 		};
 		dma-channel@100 {
 			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 			cell-index = <2>;
 			reg = <0x100 0x80>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
 		};
 		dma-channel@180 {
 			compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
 			cell-index = <3>;
 			reg = <0x180 0x80>;
+			interrupt-parent = <&ipic>;
+			interrupts = <71 8>;
 		};
 	};
 
diff --git a/MAINTAINERS b/MAINTAINERS
index c6591bc..51d8b52 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3173,7 +3173,7 @@
 F:	arch/x86/kvm/svm.c
 
 KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
-M:	Hollis Blanchard <hollisb@us.ibm.com>
+M:	Alexander Graf <agraf@suse.de>
 L:	kvm-ppc@vger.kernel.org
 W:	http://kvm.qumranet.com
 S:	Supported
diff --git a/arch/Kconfig b/arch/Kconfig
index 215e460..e5eb133 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -41,6 +41,17 @@
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config OPTPROBES
+	bool "Kprobes jump optimization support (EXPERIMENTAL)"
+	default y
+	depends on KPROBES
+	depends on !PREEMPT
+	depends on HAVE_OPTPROBES
+	select KALLSYMS_ALL
+	help
+	  This option will allow kprobes to optimize breakpoint to
+	  a jump for reducing its overhead.
+
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	bool
 	help
@@ -83,6 +94,8 @@
 config HAVE_KRETPROBES
 	bool
 
+config HAVE_OPTPROBES
+	bool
 #
 # An arch should select this if it provides all these things:
 #
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 1a8c727..9b28f12 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -366,8 +366,7 @@
 		slot_cnt += *slots_per_op;
 	}
 
-	if (len)
-		slot_cnt += *slots_per_op;
+	slot_cnt += *slots_per_op;
 
 	return slot_cnt;
 }
@@ -389,8 +388,7 @@
 		slot_cnt += *slots_per_op;
 	}
 
-	if (len)
-		slot_cnt += *slots_per_op;
+	slot_cnt += *slots_per_op;
 
 	return slot_cnt;
 }
@@ -737,10 +735,8 @@
 			i += slots_per_op;
 		} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
 
-		if (len) {
-			iter = iop_hw_desc_slot_idx(hw_desc, i);
-			iter->byte_count = len;
-		}
+		iter = iop_hw_desc_slot_idx(hw_desc, i);
+		iter->byte_count = len;
 	}
 }
 
diff --git a/arch/arm/mach-u300/include/mach/coh901318.h b/arch/arm/mach-u300/include/mach/coh901318.h
index f4cfee9..b8155b4 100644
--- a/arch/arm/mach-u300/include/mach/coh901318.h
+++ b/arch/arm/mach-u300/include/mach/coh901318.h
@@ -53,7 +53,7 @@
  * struct coh_dma_channel - dma channel base
  * @name: ascii name of dma channel
  * @number: channel id number
- * @desc_nbr_max: number of preallocated descriptortors
+ * @desc_nbr_max: number of preallocated descriptors
  * @priority_high: prio of channel, 0 low otherwise high.
  * @param: configuration parameters
  * @dev_addr: physical address of periphal connected to channel
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 01c7579..fa4d1e5 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -26,6 +26,7 @@
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
 	select KVM_APIC_ARCHITECTURE
+	select KVM_MMIO
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5fdeec5..26e0e08 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -241,10 +241,10 @@
 	return 0;
 mmio:
 	if (p->dir)
-		r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				    p->size, &p->data);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				     p->size, &p->data);
 	if (r)
 		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
@@ -636,12 +636,9 @@
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	union context *host_ctx, *guest_ctx;
-	int r;
+	int r, idx;
 
-	/*
-	 * down_read() may sleep and return with interrupts enabled
-	 */
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 again:
 	if (signal_pending(current)) {
@@ -663,7 +660,7 @@
 	if (r < 0)
 		goto vcpu_run_fail;
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	kvm_guest_enter();
 
 	/*
@@ -687,7 +684,7 @@
 	kvm_guest_exit();
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	r = kvm_handle_exit(kvm_run, vcpu);
 
@@ -697,10 +694,10 @@
 	}
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
 		kvm_resched(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
 
@@ -971,7 +968,7 @@
 			goto out;
 		r = kvm_setup_default_irq_routing(kvm);
 		if (r) {
-			kfree(kvm->arch.vioapic);
+			kvm_ioapic_destroy(kvm);
 			goto out;
 		}
 		break;
@@ -1377,12 +1374,14 @@
 
 static void kvm_release_vm_pages(struct kvm *kvm)
 {
+	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 	int i, j;
 	unsigned long base_gfn;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		memslot = &kvm->memslots[i];
+	slots = rcu_dereference(kvm->memslots);
+	for (i = 0; i < slots->nmemslots; i++) {
+		memslot = &slots->memslots[i];
 		base_gfn = memslot->base_gfn;
 
 		for (j = 0; j < memslot->npages; j++) {
@@ -1405,6 +1404,7 @@
 	kfree(kvm->arch.vioapic);
 	kvm_release_vm_pages(kvm);
 	kvm_free_physmem(kvm);
+	cleanup_srcu_struct(&kvm->srcu);
 	free_kvm(kvm);
 }
 
@@ -1576,15 +1576,15 @@
 	return r;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+		struct kvm_memory_slot *memslot,
 		struct kvm_memory_slot old,
+		struct kvm_userspace_memory_region *mem,
 		int user_alloc)
 {
 	unsigned long i;
 	unsigned long pfn;
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	int npages = memslot->npages;
 	unsigned long base_gfn = memslot->base_gfn;
 
 	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1608,6 +1608,14 @@
 	return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot old,
+		int user_alloc)
+{
+	return;
+}
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
@@ -1802,7 +1810,7 @@
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots[log->slot];
+	memslot = &kvm->memslots->memslots[log->slot];
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -1827,6 +1835,7 @@
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
+	mutex_lock(&kvm->slots_lock);
 	spin_lock(&kvm->arch.dirty_log_lock);
 
 	r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1840,12 +1849,13 @@
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
 		kvm_flush_remote_tlbs(kvm);
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 		memset(memslot->dirty_bitmap, 0, n);
 	}
 	r = 0;
 out:
+	mutex_unlock(&kvm->slots_lock);
 	spin_unlock(&kvm->arch.dirty_log_lock);
 	return r;
 }
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index e4b8231..cb548ee 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -75,7 +75,7 @@
 	struct exit_ctl_data *p;
 
 	p = kvm_get_exit_data(vcpu);
-	if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
+	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
 		p->u.pal_data.ret = result;
 		return ;
 	}
@@ -87,7 +87,7 @@
 	struct exit_ctl_data *p;
 
 	p = kvm_get_exit_data(vcpu);
-	if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
 		p->u.sal_data.ret = result;
 		return ;
 	}
@@ -322,7 +322,7 @@
 	struct exit_ctl_data *p;
 
 	p = kvm_get_exit_data(vcpu);
-	if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
+	if (p->exit_reason == EXIT_REASON_PAL_CALL)
 		index = p->u.pal_data.gr28;
 
 	return index;
@@ -646,18 +646,16 @@
 
 	p = kvm_get_exit_data(vcpu);
 
-	if (p) {
-		if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-			*in0 = p->u.sal_data.in0;
-			*in1 = p->u.sal_data.in1;
-			*in2 = p->u.sal_data.in2;
-			*in3 = p->u.sal_data.in3;
-			*in4 = p->u.sal_data.in4;
-			*in5 = p->u.sal_data.in5;
-			*in6 = p->u.sal_data.in6;
-			*in7 = p->u.sal_data.in7;
-			return ;
-		}
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		*in0 = p->u.sal_data.in0;
+		*in1 = p->u.sal_data.in1;
+		*in2 = p->u.sal_data.in2;
+		*in3 = p->u.sal_data.in3;
+		*in4 = p->u.sal_data.in4;
+		*in5 = p->u.sal_data.in5;
+		*in6 = p->u.sal_data.in6;
+		*in7 = p->u.sal_data.in7;
+		return ;
 	}
 	*in0 = 0;
 }
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 9bf55af..fb8f9f5 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -316,8 +316,8 @@
 		return;
 	} else {
 		inst_type = -1;
-		panic_vm(vcpu, "Unsupported MMIO access instruction! \
-				Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+		panic_vm(vcpu, "Unsupported MMIO access instruction! "
+				"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
 				bundle.i64[0], bundle.i64[1]);
 	}
 
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index dce75b70..958815c 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -1639,8 +1639,8 @@
 	 * Otherwise panic
 	 */
 	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
-				& vpsr.is=0\n");
+		panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
+				"& vpsr.is=0\n");
 
 	/*
 	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index af2abe7..aadf2dd 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -97,4 +97,10 @@
 #define RESUME_HOST             RESUME_FLAG_HOST
 #define RESUME_HOST_NV          (RESUME_FLAG_HOST|RESUME_FLAG_NV)
 
+#define KVM_GUEST_MODE_NONE	0
+#define KVM_GUEST_MODE_GUEST	1
+#define KVM_GUEST_MODE_SKIP	2
+
+#define KVM_INST_FETCH_FAILED	-1
+
 #endif /* __POWERPC_KVM_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 74b7369..db7db0a 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -22,7 +22,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_host.h>
-#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_64_asm.h>
 
 struct kvmppc_slb {
 	u64 esid;
@@ -33,7 +33,8 @@
 	bool Ks;
 	bool Kp;
 	bool nx;
-	bool large;
+	bool large;	/* PTEs are 16MB */
+	bool tb;	/* 1TB segment */
 	bool class;
 };
 
@@ -69,6 +70,7 @@
 
 struct kvmppc_vcpu_book3s {
 	struct kvm_vcpu vcpu;
+	struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
 	struct kvmppc_sid_map sid_map[SID_MAP_NUM];
 	struct kvmppc_slb slb[64];
 	struct {
@@ -89,6 +91,7 @@
 	u64 vsid_next;
 	u64 vsid_max;
 	int context_id;
+	ulong prog_flags; /* flags to inject when giving a 700 trap */
 };
 
 #define CONTEXT_HOST		0
@@ -119,6 +122,10 @@
 
 extern u32 kvmppc_trampoline_lowmem;
 extern u32 kvmppc_trampoline_enter;
+extern void kvmppc_rmcall(ulong srr0, ulong srr1);
+extern void kvmppc_load_up_fpu(void);
+extern void kvmppc_load_up_altivec(void);
+extern void kvmppc_load_up_vsx(void);
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
index 2e06ee8..183461b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
@@ -20,6 +20,8 @@
 #ifndef __ASM_KVM_BOOK3S_ASM_H__
 #define __ASM_KVM_BOOK3S_ASM_H__
 
+#ifdef __ASSEMBLY__
+
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 
 #include <asm/kvm_asm.h>
@@ -55,4 +57,20 @@
 
 #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */
 
+#else  /*__ASSEMBLY__ */
+
+struct kvmppc_book3s_shadow_vcpu {
+	ulong gpr[14];
+	u32 cr;
+	u32 xer;
+	ulong host_r1;
+	ulong host_r2;
+	ulong handler;
+	ulong scratch0;
+	ulong scratch1;
+	ulong vmhandler;
+};
+
+#endif /*__ASSEMBLY__ */
+
 #endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 9d497ce..7fea26f 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -52,9 +52,12 @@
 	u32 mas5;
 	u32 mas6;
 	u32 mas7;
+	u32 l1csr0;
 	u32 l1csr1;
 	u32 hid0;
 	u32 hid1;
+	u32 tlb0cfg;
+	u32 tlb1cfg;
 
 	struct kvm_vcpu vcpu;
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1201f62..5e5bae7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -167,23 +167,40 @@
 	ulong trampoline_lowmem;
 	ulong trampoline_enter;
 	ulong highmem_handler;
+	ulong rmcall;
 	ulong host_paca_phys;
 	struct kvmppc_mmu mmu;
 #endif
 
-	u64 fpr[32];
 	ulong gpr[32];
 
+	u64 fpr[32];
+	u32 fpscr;
+
+#ifdef CONFIG_ALTIVEC
+	vector128 vr[32];
+	vector128 vscr;
+#endif
+
+#ifdef CONFIG_VSX
+	u64 vsr[32];
+#endif
+
 	ulong pc;
-	u32 cr;
 	ulong ctr;
 	ulong lr;
+
+#ifdef CONFIG_BOOKE
 	ulong xer;
+	u32 cr;
+#endif
 
 	ulong msr;
 #ifdef CONFIG_PPC64
 	ulong shadow_msr;
+	ulong shadow_srr1;
 	ulong hflags;
+	ulong guest_owned_ext;
 #endif
 	u32 mmucr;
 	ulong sprg0;
@@ -242,6 +259,8 @@
 #endif
 	ulong fault_dear;
 	ulong fault_esr;
+	ulong queued_dear;
+	ulong queued_esr;
 	gpa_t paddr_accessed;
 
 	u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 269ee46..e264282 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -28,6 +28,9 @@
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/kvm_book3s.h>
+#endif
 
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
@@ -80,8 +83,9 @@
 
 extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
 extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 
@@ -95,4 +99,81 @@
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_PPC_BOOK3S
+
+/* We assume we're always acting on the current vcpu */
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+	if ( num < 14 ) {
+		get_paca()->shadow_vcpu.gpr[num] = val;
+		to_book3s(vcpu)->shadow_vcpu.gpr[num] = val;
+	} else
+		vcpu->arch.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+	if ( num < 14 )
+		return get_paca()->shadow_vcpu.gpr[num];
+	else
+		return vcpu->arch.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+	get_paca()->shadow_vcpu.cr = val;
+	to_book3s(vcpu)->shadow_vcpu.cr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+	return get_paca()->shadow_vcpu.cr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+{
+	get_paca()->shadow_vcpu.xer = val;
+	to_book3s(vcpu)->shadow_vcpu.xer = val;
+}
+
+static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+	return get_paca()->shadow_vcpu.xer;
+}
+
+#else
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+	vcpu->arch.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+	return vcpu->arch.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+	vcpu->arch.cr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+{
+	vcpu->arch.xer = val;
+}
+
+static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.xer;
+}
+
+#endif
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 5e9b4ef..d8a6931 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -19,6 +19,9 @@
 #include <asm/mmu.h>
 #include <asm/page.h>
 #include <asm/exception-64e.h>
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include <asm/kvm_book3s_64_asm.h>
+#endif
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -135,6 +138,8 @@
 		u64     esid;
 		u64     vsid;
 	} kvm_slb[64];			/* guest SLB */
+	/* We use this to store guest state in */
+	struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
 	u8 kvm_slb_max;			/* highest used guest slb entry */
 	u8 kvm_in_guest;		/* are we inside the guest? */
 #endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index bc8dd53..5572e86 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -426,6 +426,10 @@
 #define   SRR1_WAKEMT		0x00280000 /* mtctrl */
 #define   SRR1_WAKEDEC		0x00180000 /* Decrementer interrupt */
 #define   SRR1_WAKETHERM	0x00100000 /* Thermal management interrupt */
+#define   SRR1_PROGFPE		0x00100000 /* Floating Point Enabled */
+#define   SRR1_PROGPRIV		0x00040000 /* Privileged instruction */
+#define   SRR1_PROGTRAP		0x00020000 /* Trap */
+#define   SRR1_PROGADDR		0x00010000 /* SRR0 contains subsequent addr */
 #define SPRN_HSRR0	0x13A	/* Save/Restore Register 0 */
 #define SPRN_HSRR1	0x13B	/* Save/Restore Register 1 */
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a6c2b63..957ceb7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -194,6 +194,30 @@
 	DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
 	DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
 	DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
+	DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
+	DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
+	DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
+	DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
+	DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
+	DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
+	DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
+	DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
+	DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
+	DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
+	DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
+	DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
+	DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
+	DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
+	DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
+	DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
+	DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
+	DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
+	DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
+					    shadow_vcpu.vmhandler));
+	DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
+					   shadow_vcpu.scratch0));
+	DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
+					   shadow_vcpu.scratch1));
 #endif
 #endif /* CONFIG_PPC64 */
 
@@ -389,8 +413,6 @@
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
-	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
-	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
 	DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
@@ -411,11 +433,16 @@
 	DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
 	DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
 	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
 	DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
 	DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
 	DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
+	DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
 	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
-#endif
+#else
+	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+#endif /* CONFIG_PPC64 */
 #endif
 #ifdef CONFIG_44x
 	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 42545145..ab3e392 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -107,6 +107,7 @@
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 EXPORT_SYMBOL(giveup_vsx);
+EXPORT_SYMBOL_GPL(__giveup_vsx);
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
 EXPORT_SYMBOL(giveup_spe);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 61af58f..65ea083 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -65,13 +65,14 @@
 			 */
 			switch (dcrn) {
 			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
 				break;
 			case DCRN_CPR0_CONFIG_DATA:
 				local_irq_disable();
 				mtdcr(DCRN_CPR0_CONFIG_ADDR,
 					  vcpu->arch.cpr0_cfgaddr);
-				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+				kvmppc_set_gpr(vcpu, rt,
+					       mfdcr(DCRN_CPR0_CONFIG_DATA));
 				local_irq_enable();
 				break;
 			default:
@@ -93,11 +94,11 @@
 			/* emulate some access in kernel */
 			switch (dcrn) {
 			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+				vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
 				break;
 			default:
 				run->dcr.dcrn = dcrn;
-				run->dcr.data = vcpu->arch.gpr[rs];
+				run->dcr.data = kvmppc_get_gpr(vcpu, rs);
 				run->dcr.is_write = 1;
 				vcpu->arch.dcr_needed = 1;
 				kvmppc_account_exit(vcpu, DCR_EXITS);
@@ -146,13 +147,13 @@
 
 	switch (sprn) {
 	case SPRN_PID:
-		kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+		kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
 	case SPRN_MMUCR:
-		vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
 	case SPRN_CCR0:
-		vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
 	case SPRN_CCR1:
-		vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
 	default:
 		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
 	}
@@ -167,13 +168,13 @@
 
 	switch (sprn) {
 	case SPRN_PID:
-		vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
 	case SPRN_MMUCR:
-		vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
 	case SPRN_CCR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
 	case SPRN_CCR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
 	default:
 		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
 	}
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ff3cb63..2570fcc 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -439,7 +439,7 @@
 	struct kvmppc_44x_tlbe *tlbe;
 	unsigned int gtlb_index;
 
-	gtlb_index = vcpu->arch.gpr[ra];
+	gtlb_index = kvmppc_get_gpr(vcpu, ra);
 	if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
 		printk("%s: index %d\n", __func__, gtlb_index);
 		kvmppc_dump_vcpu(vcpu);
@@ -455,15 +455,15 @@
 	switch (ws) {
 	case PPC44x_TLB_PAGEID:
 		tlbe->tid = get_mmucr_stid(vcpu);
-		tlbe->word0 = vcpu->arch.gpr[rs];
+		tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
 		break;
 
 	case PPC44x_TLB_XLAT:
-		tlbe->word1 = vcpu->arch.gpr[rs];
+		tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
 		break;
 
 	case PPC44x_TLB_ATTRIB:
-		tlbe->word2 = vcpu->arch.gpr[rs];
+		tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
 		break;
 
 	default:
@@ -500,18 +500,20 @@
 	unsigned int as = get_mmucr_sts(vcpu);
 	unsigned int pid = get_mmucr_stid(vcpu);
 
-	ea = vcpu->arch.gpr[rb];
+	ea = kvmppc_get_gpr(vcpu, rb);
 	if (ra)
-		ea += vcpu->arch.gpr[ra];
+		ea += kvmppc_get_gpr(vcpu, ra);
 
 	gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
 	if (rc) {
+		u32 cr = kvmppc_get_cr(vcpu);
+
 		if (gtlb_index < 0)
-			vcpu->arch.cr &= ~0x20000000;
+			kvmppc_set_cr(vcpu, cr & ~0x20000000);
 		else
-			vcpu->arch.cr |= 0x20000000;
+			kvmppc_set_cr(vcpu, cr | 0x20000000);
 	}
-	vcpu->arch.gpr[rt] = gtlb_index;
+	kvmppc_set_gpr(vcpu, rt, gtlb_index);
 
 	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
 	return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index fe037fd..60624cc 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@
 	bool
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select KVM_MMIO
 
 config KVM_BOOK3S_64_HANDLER
 	bool
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3e294bd..9a271f0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -33,12 +33,9 @@
 
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
+/* #define DEBUG_EXT */
 
-/* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0.
- * When set, we retrigger a DEC interrupt after that if DEC <= 0.
- * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */
-
-/* #define AGGRESSIVE_DEC */
+static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exits",       VCPU_STAT(sum_exits) },
@@ -72,16 +69,24 @@
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
+	memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
+	       sizeof(get_paca()->shadow_vcpu));
 	get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
+	memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
+	       sizeof(get_paca()->shadow_vcpu));
 	to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
+
+	kvmppc_giveup_ext(vcpu, MSR_FP);
+	kvmppc_giveup_ext(vcpu, MSR_VEC);
+	kvmppc_giveup_ext(vcpu, MSR_VSX);
 }
 
-#if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG)
+#if defined(EXIT_DEBUG)
 static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
 {
 	u64 jd = mftb() - vcpu->arch.dec_jiffies;
@@ -89,6 +94,23 @@
 }
 #endif
 
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.shadow_msr = vcpu->arch.msr;
+	/* Guest MSR values */
+	vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
+				 MSR_BE | MSR_DE;
+	/* Process MSR values */
+	vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
+				 MSR_EE;
+	/* External providers the guest reserved */
+	vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
+	/* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+	vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
+#endif
+}
+
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
 	ulong old_msr = vcpu->arch.msr;
@@ -96,12 +118,10 @@
 #ifdef EXIT_DEBUG
 	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
+
 	msr &= to_book3s(vcpu)->msr_mask;
 	vcpu->arch.msr = msr;
-	vcpu->arch.shadow_msr = msr | MSR_USER32;
-	vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
-				   MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
-				   MSR_FE1);
+	kvmppc_recalc_shadow_msr(vcpu);
 
 	if (msr & (MSR_WE|MSR_POW)) {
 		if (!vcpu->arch.pending_exceptions) {
@@ -125,11 +145,10 @@
 	vcpu->arch.mmu.reset_msr(vcpu);
 }
 
-void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 {
 	unsigned int prio;
 
-	vcpu->stat.queue_intr++;
 	switch (vec) {
 	case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET;		break;
 	case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK;	break;
@@ -149,15 +168,31 @@
 	default:    prio = BOOK3S_IRQPRIO_MAX;			break;
 	}
 
-	set_bit(prio, &vcpu->arch.pending_exceptions);
+	return prio;
+}
+
+static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+					  unsigned int vec)
+{
+	clear_bit(kvmppc_book3s_vec2irqprio(vec),
+		  &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+{
+	vcpu->stat.queue_intr++;
+
+	set_bit(kvmppc_book3s_vec2irqprio(vec),
+		&vcpu->arch.pending_exceptions);
 #ifdef EXIT_DEBUG
 	printk(KERN_INFO "Queueing interrupt %x\n", vec);
 #endif
 }
 
 
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 {
+	to_book3s(vcpu)->prog_flags = flags;
 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
 }
 
@@ -171,6 +206,11 @@
 	return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
 }
 
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
@@ -181,6 +221,7 @@
 {
 	int deliver = 1;
 	int vec = 0;
+	ulong flags = 0ULL;
 
 	switch (priority) {
 	case BOOK3S_IRQPRIO_DECREMENTER:
@@ -214,6 +255,7 @@
 		break;
 	case BOOK3S_IRQPRIO_PROGRAM:
 		vec = BOOK3S_INTERRUPT_PROGRAM;
+		flags = to_book3s(vcpu)->prog_flags;
 		break;
 	case BOOK3S_IRQPRIO_VSX:
 		vec = BOOK3S_INTERRUPT_VSX;
@@ -244,7 +286,7 @@
 #endif
 
 	if (deliver)
-		kvmppc_inject_interrupt(vcpu, vec, 0ULL);
+		kvmppc_inject_interrupt(vcpu, vec, flags);
 
 	return deliver;
 }
@@ -254,21 +296,15 @@
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
 	unsigned int priority;
 
-	/* XXX be more clever here - no need to mftb() on every entry */
-	/* Issue DEC again if it's still active */
-#ifdef AGGRESSIVE_DEC
-	if (vcpu->arch.msr & MSR_EE)
-		if (kvmppc_get_dec(vcpu) & 0x80000000)
-			kvmppc_core_queue_dec(vcpu);
-#endif
-
 #ifdef EXIT_DEBUG
 	if (vcpu->arch.pending_exceptions)
 		printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
 #endif
 	priority = __ffs(*pending);
 	while (priority <= (sizeof(unsigned int) * 8)) {
-		if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) {
+		if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
+		    (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
+			/* DEC interrupts get cleared by mtdec */
 			clear_bit(priority, &vcpu->arch.pending_exceptions);
 			break;
 		}
@@ -503,14 +539,14 @@
 		/* Page not found in guest PTE entries */
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
-		vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+		vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EPERM) {
 		/* Storage protection */
 		vcpu->arch.dear = vcpu->arch.fault_dear;
 		to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
 		to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
-		vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+		vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EINVAL) {
 		/* Page not found in guest SLB */
@@ -532,13 +568,122 @@
 		r = kvmppc_emulate_mmio(run, vcpu);
 		if ( r == RESUME_HOST_NV )
 			r = RESUME_HOST;
-		if ( r == RESUME_GUEST_NV )
-			r = RESUME_GUEST;
 	}
 
 	return r;
 }
 
+static inline int get_fpr_index(int i)
+{
+#ifdef CONFIG_VSX
+	i *= 2;
+#endif
+	return i;
+}
+
+/* Give up external provider (FPU, Altivec, VSX) */
+static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+{
+	struct thread_struct *t = &current->thread;
+	u64 *vcpu_fpr = vcpu->arch.fpr;
+	u64 *vcpu_vsx = vcpu->arch.vsr;
+	u64 *thread_fpr = (u64*)t->fpr;
+	int i;
+
+	if (!(vcpu->arch.guest_owned_ext & msr))
+		return;
+
+#ifdef DEBUG_EXT
+	printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
+#endif
+
+	switch (msr) {
+	case MSR_FP:
+		giveup_fpu(current);
+		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
+
+		vcpu->arch.fpscr = t->fpscr.val;
+		break;
+	case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+		giveup_altivec(current);
+		memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
+		vcpu->arch.vscr = t->vscr;
+#endif
+		break;
+	case MSR_VSX:
+#ifdef CONFIG_VSX
+		__giveup_vsx(current);
+		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+			vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+		break;
+	default:
+		BUG();
+	}
+
+	vcpu->arch.guest_owned_ext &= ~msr;
+	current->thread.regs->msr &= ~msr;
+	kvmppc_recalc_shadow_msr(vcpu);
+}
+
+/* Handle external providers (FPU, Altivec, VSX) */
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+			     ulong msr)
+{
+	struct thread_struct *t = &current->thread;
+	u64 *vcpu_fpr = vcpu->arch.fpr;
+	u64 *vcpu_vsx = vcpu->arch.vsr;
+	u64 *thread_fpr = (u64*)t->fpr;
+	int i;
+
+	if (!(vcpu->arch.msr & msr)) {
+		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+		return RESUME_GUEST;
+	}
+
+#ifdef DEBUG_EXT
+	printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
+#endif
+
+	current->thread.regs->msr |= msr;
+
+	switch (msr) {
+	case MSR_FP:
+		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+			thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
+
+		t->fpscr.val = vcpu->arch.fpscr;
+		t->fpexc_mode = 0;
+		kvmppc_load_up_fpu();
+		break;
+	case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+		memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
+		t->vscr = vcpu->arch.vscr;
+		t->vrsave = -1;
+		kvmppc_load_up_altivec();
+#endif
+		break;
+	case MSR_VSX:
+#ifdef CONFIG_VSX
+		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+			thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+		kvmppc_load_up_vsx();
+#endif
+		break;
+	default:
+		BUG();
+	}
+
+	vcpu->arch.guest_owned_ext |= msr;
+
+	kvmppc_recalc_shadow_msr(vcpu);
+
+	return RESUME_GUEST;
+}
+
 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned int exit_nr)
 {
@@ -563,7 +708,7 @@
 	case BOOK3S_INTERRUPT_INST_STORAGE:
 		vcpu->stat.pf_instruc++;
 		/* only care about PTEG not found errors, but leave NX alone */
-		if (vcpu->arch.shadow_msr & 0x40000000) {
+		if (vcpu->arch.shadow_srr1 & 0x40000000) {
 			r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
 			vcpu->stat.sp_instruc++;
 		} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -575,7 +720,7 @@
 			 */
 			kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
 		} else {
-			vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000);
+			vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
 			r = RESUME_GUEST;
@@ -621,6 +766,9 @@
 	case BOOK3S_INTERRUPT_PROGRAM:
 	{
 		enum emulation_result er;
+		ulong flags;
+
+		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
 		if (vcpu->arch.msr & MSR_PR) {
 #ifdef EXIT_DEBUG
@@ -628,7 +776,7 @@
 #endif
 			if ((vcpu->arch.last_inst & 0xff0007ff) !=
 			    (INS_DCBZ & 0xfffffff7)) {
-				kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+				kvmppc_core_queue_program(vcpu, flags);
 				r = RESUME_GUEST;
 				break;
 			}
@@ -638,12 +786,12 @@
 		er = kvmppc_emulate_instruction(run, vcpu);
 		switch (er) {
 		case EMULATE_DONE:
-			r = RESUME_GUEST;
+			r = RESUME_GUEST_NV;
 			break;
 		case EMULATE_FAIL:
 			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
 			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
-			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+			kvmppc_core_queue_program(vcpu, flags);
 			r = RESUME_GUEST;
 			break;
 		default:
@@ -653,23 +801,30 @@
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
 #ifdef EXIT_DEBUG
-		printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]);
+		printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
 #endif
 		vcpu->stat.syscall_exits++;
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		r = RESUME_GUEST;
 		break;
-	case BOOK3S_INTERRUPT_MACHINE_CHECK:
 	case BOOK3S_INTERRUPT_FP_UNAVAIL:
-	case BOOK3S_INTERRUPT_TRACE:
+		r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
+		break;
 	case BOOK3S_INTERRUPT_ALTIVEC:
+		r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
+		break;
 	case BOOK3S_INTERRUPT_VSX:
+		r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
+		break;
+	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+	case BOOK3S_INTERRUPT_TRACE:
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		r = RESUME_GUEST;
 		break;
 	default:
 		/* Ugh - bork here! What did we get? */
-		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr);
+		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
+			exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
 		r = RESUME_HOST;
 		BUG();
 		break;
@@ -712,10 +867,10 @@
 	int i;
 
 	regs->pc = vcpu->arch.pc;
-	regs->cr = vcpu->arch.cr;
+	regs->cr = kvmppc_get_cr(vcpu);
 	regs->ctr = vcpu->arch.ctr;
 	regs->lr = vcpu->arch.lr;
-	regs->xer = vcpu->arch.xer;
+	regs->xer = kvmppc_get_xer(vcpu);
 	regs->msr = vcpu->arch.msr;
 	regs->srr0 = vcpu->arch.srr0;
 	regs->srr1 = vcpu->arch.srr1;
@@ -729,7 +884,7 @@
 	regs->sprg7 = vcpu->arch.sprg6;
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
-		regs->gpr[i] = vcpu->arch.gpr[i];
+		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
 	return 0;
 }
@@ -739,10 +894,10 @@
 	int i;
 
 	vcpu->arch.pc = regs->pc;
-	vcpu->arch.cr = regs->cr;
+	kvmppc_set_cr(vcpu, regs->cr);
 	vcpu->arch.ctr = regs->ctr;
 	vcpu->arch.lr = regs->lr;
-	vcpu->arch.xer = regs->xer;
+	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.srr0 = regs->srr0;
 	vcpu->arch.srr1 = regs->srr1;
@@ -754,8 +909,8 @@
 	vcpu->arch.sprg6 = regs->sprg5;
 	vcpu->arch.sprg7 = regs->sprg6;
 
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
-		vcpu->arch.gpr[i] = regs->gpr[i];
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
 	return 0;
 }
@@ -850,7 +1005,7 @@
 	int is_dirty = 0;
 	int r, n;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 	if (r)
@@ -858,7 +1013,7 @@
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 
 		ga = memslot->base_gfn << PAGE_SHIFT;
 		ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -872,7 +1027,7 @@
 
 	r = 0;
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -910,6 +1065,7 @@
 	vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
 	vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
 	vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+	vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
 
 	vcpu->arch.shadow_msr = MSR_USER64;
 
@@ -943,6 +1099,10 @@
 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
+	struct thread_struct ext_bkp;
+	bool save_vec = current->thread.used_vr;
+	bool save_vsx = current->thread.used_vsr;
+	ulong ext_msr;
 
 	/* No need to go into the guest when all we do is going out */
 	if (signal_pending(current)) {
@@ -950,6 +1110,35 @@
 		return -EINTR;
 	}
 
+	/* Save FPU state in stack */
+	if (current->thread.regs->msr & MSR_FP)
+		giveup_fpu(current);
+	memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
+	ext_bkp.fpscr = current->thread.fpscr;
+	ext_bkp.fpexc_mode = current->thread.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+	/* Save Altivec state in stack */
+	if (save_vec) {
+		if (current->thread.regs->msr & MSR_VEC)
+			giveup_altivec(current);
+		memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
+		ext_bkp.vscr = current->thread.vscr;
+		ext_bkp.vrsave = current->thread.vrsave;
+	}
+	ext_bkp.used_vr = current->thread.used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+	/* Save VSX state in stack */
+	if (save_vsx && (current->thread.regs->msr & MSR_VSX))
+			__giveup_vsx(current);
+	ext_bkp.used_vsr = current->thread.used_vsr;
+#endif
+
+	/* Remember the MSR with disabled extensions */
+	ext_msr = current->thread.regs->msr;
+
 	/* XXX we get called with irq disabled - change that! */
 	local_irq_enable();
 
@@ -957,6 +1146,32 @@
 
 	local_irq_disable();
 
+	current->thread.regs->msr = ext_msr;
+
+	/* Make sure we save the guest FPU/Altivec/VSX state */
+	kvmppc_giveup_ext(vcpu, MSR_FP);
+	kvmppc_giveup_ext(vcpu, MSR_VEC);
+	kvmppc_giveup_ext(vcpu, MSR_VSX);
+
+	/* Restore FPU state from stack */
+	memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
+	current->thread.fpscr = ext_bkp.fpscr;
+	current->thread.fpexc_mode = ext_bkp.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+	/* Restore Altivec state from stack */
+	if (save_vec && current->thread.used_vr) {
+		memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
+		current->thread.vscr = ext_bkp.vscr;
+		current->thread.vrsave= ext_bkp.vrsave;
+	}
+	current->thread.used_vr = ext_bkp.used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+	current->thread.used_vsr = ext_bkp.used_vsr;
+#endif
+
 	return ret;
 }
 
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c
index 1027eac..2b0ee7e 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -65,11 +65,11 @@
 	case 31:
 		switch (get_xop(inst)) {
 		case OP_31_XOP_MFMSR:
-			vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr;
+			kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
 			break;
 		case OP_31_XOP_MTMSRD:
 		{
-			ulong rs = vcpu->arch.gpr[get_rs(inst)];
+			ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
 			if (inst & 0x10000) {
 				vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
 				vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
@@ -78,30 +78,30 @@
 			break;
 		}
 		case OP_31_XOP_MTMSR:
-			kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]);
+			kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
 			break;
 		case OP_31_XOP_MFSRIN:
 		{
 			int srnum;
 
-			srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf;
+			srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
 			if (vcpu->arch.mmu.mfsrin) {
 				u32 sr;
 				sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
-				vcpu->arch.gpr[get_rt(inst)] = sr;
+				kvmppc_set_gpr(vcpu, get_rt(inst), sr);
 			}
 			break;
 		}
 		case OP_31_XOP_MTSRIN:
 			vcpu->arch.mmu.mtsrin(vcpu,
-				(vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf,
-				vcpu->arch.gpr[get_rs(inst)]);
+				(kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
+				kvmppc_get_gpr(vcpu, get_rs(inst)));
 			break;
 		case OP_31_XOP_TLBIE:
 		case OP_31_XOP_TLBIEL:
 		{
 			bool large = (inst & 0x00200000) ? true : false;
-			ulong addr = vcpu->arch.gpr[get_rb(inst)];
+			ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
 			vcpu->arch.mmu.tlbie(vcpu, addr, large);
 			break;
 		}
@@ -111,14 +111,16 @@
 			if (!vcpu->arch.mmu.slbmte)
 				return EMULATE_FAIL;
 
-			vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)],
-						vcpu->arch.gpr[get_rb(inst)]);
+			vcpu->arch.mmu.slbmte(vcpu,
+					kvmppc_get_gpr(vcpu, get_rs(inst)),
+					kvmppc_get_gpr(vcpu, get_rb(inst)));
 			break;
 		case OP_31_XOP_SLBIE:
 			if (!vcpu->arch.mmu.slbie)
 				return EMULATE_FAIL;
 
-			vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]);
+			vcpu->arch.mmu.slbie(vcpu,
+					kvmppc_get_gpr(vcpu, get_rb(inst)));
 			break;
 		case OP_31_XOP_SLBIA:
 			if (!vcpu->arch.mmu.slbia)
@@ -132,9 +134,9 @@
 			} else {
 				ulong t, rb;
 
-				rb = vcpu->arch.gpr[get_rb(inst)];
+				rb = kvmppc_get_gpr(vcpu, get_rb(inst));
 				t = vcpu->arch.mmu.slbmfee(vcpu, rb);
-				vcpu->arch.gpr[get_rt(inst)] = t;
+				kvmppc_set_gpr(vcpu, get_rt(inst), t);
 			}
 			break;
 		case OP_31_XOP_SLBMFEV:
@@ -143,20 +145,20 @@
 			} else {
 				ulong t, rb;
 
-				rb = vcpu->arch.gpr[get_rb(inst)];
+				rb = kvmppc_get_gpr(vcpu, get_rb(inst));
 				t = vcpu->arch.mmu.slbmfev(vcpu, rb);
-				vcpu->arch.gpr[get_rt(inst)] = t;
+				kvmppc_set_gpr(vcpu, get_rt(inst), t);
 			}
 			break;
 		case OP_31_XOP_DCBZ:
 		{
-			ulong rb =  vcpu->arch.gpr[get_rb(inst)];
+			ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
 			ulong ra = 0;
 			ulong addr;
 			u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
 			if (get_ra(inst))
-				ra = vcpu->arch.gpr[get_ra(inst)];
+				ra = kvmppc_get_gpr(vcpu, get_ra(inst));
 
 			addr = (ra + rb) & ~31ULL;
 			if (!(vcpu->arch.msr & MSR_SF))
@@ -233,43 +235,44 @@
 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 {
 	int emulated = EMULATE_DONE;
+	ulong spr_val = kvmppc_get_gpr(vcpu, rs);
 
 	switch (sprn) {
 	case SPRN_SDR1:
-		to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->sdr1 = spr_val;
 		break;
 	case SPRN_DSISR:
-		to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->dsisr = spr_val;
 		break;
 	case SPRN_DAR:
-		vcpu->arch.dear = vcpu->arch.gpr[rs];
+		vcpu->arch.dear = spr_val;
 		break;
 	case SPRN_HIOR:
-		to_book3s(vcpu)->hior = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->hior = spr_val;
 		break;
 	case SPRN_IBAT0U ... SPRN_IBAT3L:
 	case SPRN_IBAT4U ... SPRN_IBAT7L:
 	case SPRN_DBAT0U ... SPRN_DBAT3L:
 	case SPRN_DBAT4U ... SPRN_DBAT7L:
-		kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]);
+		kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
 		/* BAT writes happen so rarely that we're ok to flush
 		 * everything here */
 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
 		break;
 	case SPRN_HID0:
-		to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->hid[0] = spr_val;
 		break;
 	case SPRN_HID1:
-		to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->hid[1] = spr_val;
 		break;
 	case SPRN_HID2:
-		to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->hid[2] = spr_val;
 		break;
 	case SPRN_HID4:
-		to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->hid[4] = spr_val;
 		break;
 	case SPRN_HID5:
-		to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs];
+		to_book3s(vcpu)->hid[5] = spr_val;
 		/* guest HID5 set can change is_dcbz32 */
 		if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
 		    (mfmsr() & MSR_HV))
@@ -299,38 +302,38 @@
 
 	switch (sprn) {
 	case SPRN_SDR1:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1;
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
 		break;
 	case SPRN_DSISR:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr;
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
 		break;
 	case SPRN_DAR:
-		vcpu->arch.gpr[rt] = vcpu->arch.dear;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
 		break;
 	case SPRN_HIOR:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior;
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
 		break;
 	case SPRN_HID0:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0];
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
 		break;
 	case SPRN_HID1:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1];
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
 		break;
 	case SPRN_HID2:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2];
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
 		break;
 	case SPRN_HID4:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4];
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
 		break;
 	case SPRN_HID5:
-		vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5];
+		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
 		break;
 	case SPRN_THRM1:
 	case SPRN_THRM2:
 	case SPRN_THRM3:
 	case SPRN_CTRLF:
 	case SPRN_CTRLT:
-		vcpu->arch.gpr[rt] = 0;
+		kvmppc_set_gpr(vcpu, rt, 0);
 		break;
 	default:
 		printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_64_exports.c
index 5b2db38..1dd5a1d 100644
--- a/arch/powerpc/kvm/book3s_64_exports.c
+++ b/arch/powerpc/kvm/book3s_64_exports.c
@@ -22,3 +22,11 @@
 
 EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
 EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
+EXPORT_SYMBOL_GPL(kvmppc_rmcall);
+EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
+#ifdef CONFIG_ALTIVEC
+EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
+#endif
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
+#endif
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_64_interrupts.S
index 7b55d80..c1584d0 100644
--- a/arch/powerpc/kvm/book3s_64_interrupts.S
+++ b/arch/powerpc/kvm/book3s_64_interrupts.S
@@ -28,11 +28,6 @@
 #define ULONG_SIZE 8
 #define VCPU_GPR(n)     (VCPU_GPRS + (n * ULONG_SIZE))
 
-.macro mfpaca tmp_reg, src_reg, offset, vcpu_reg
-	ld	\tmp_reg, (PACA_EXMC+\offset)(r13)
-	std	\tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg)
-.endm
-
 .macro DISABLE_INTERRUPTS
        mfmsr   r0
        rldicl  r0,r0,48,1
@@ -40,6 +35,26 @@
        mtmsrd  r0,1
 .endm
 
+#define VCPU_LOAD_NVGPRS(vcpu) \
+	ld	r14, VCPU_GPR(r14)(vcpu); \
+	ld	r15, VCPU_GPR(r15)(vcpu); \
+	ld	r16, VCPU_GPR(r16)(vcpu); \
+	ld	r17, VCPU_GPR(r17)(vcpu); \
+	ld	r18, VCPU_GPR(r18)(vcpu); \
+	ld	r19, VCPU_GPR(r19)(vcpu); \
+	ld	r20, VCPU_GPR(r20)(vcpu); \
+	ld	r21, VCPU_GPR(r21)(vcpu); \
+	ld	r22, VCPU_GPR(r22)(vcpu); \
+	ld	r23, VCPU_GPR(r23)(vcpu); \
+	ld	r24, VCPU_GPR(r24)(vcpu); \
+	ld	r25, VCPU_GPR(r25)(vcpu); \
+	ld	r26, VCPU_GPR(r26)(vcpu); \
+	ld	r27, VCPU_GPR(r27)(vcpu); \
+	ld	r28, VCPU_GPR(r28)(vcpu); \
+	ld	r29, VCPU_GPR(r29)(vcpu); \
+	ld	r30, VCPU_GPR(r30)(vcpu); \
+	ld	r31, VCPU_GPR(r31)(vcpu); \
+
 /*****************************************************************************
  *                                                                           *
  *     Guest entry / exit code that is in kernel module memory (highmem)     *
@@ -67,61 +82,32 @@
 	SAVE_NVGPRS(r1)
 
 	/* Save LR */
-	mflr	r14
-	std	r14, _LINK(r1)
-
-/* XXX optimize non-volatile loading away */
-kvm_start_lightweight:
-
-	DISABLE_INTERRUPTS
-
-	/* Save R1/R2 in the PACA */
-	std	r1, PACAR1(r13)
-	std	r2, (PACA_EXMC+EX_SRR0)(r13)
-	ld	r3, VCPU_HIGHMEM_HANDLER(r4)
-	std	r3, PACASAVEDMSR(r13)
+	std	r0, _LINK(r1)
 
 	/* Load non-volatile guest state from the vcpu */
-	ld	r14, VCPU_GPR(r14)(r4)
-	ld	r15, VCPU_GPR(r15)(r4)
-	ld	r16, VCPU_GPR(r16)(r4)
-	ld	r17, VCPU_GPR(r17)(r4)
-	ld	r18, VCPU_GPR(r18)(r4)
-	ld	r19, VCPU_GPR(r19)(r4)
-	ld	r20, VCPU_GPR(r20)(r4)
-	ld	r21, VCPU_GPR(r21)(r4)
-	ld	r22, VCPU_GPR(r22)(r4)
-	ld	r23, VCPU_GPR(r23)(r4)
-	ld	r24, VCPU_GPR(r24)(r4)
-	ld	r25, VCPU_GPR(r25)(r4)
-	ld	r26, VCPU_GPR(r26)(r4)
-	ld	r27, VCPU_GPR(r27)(r4)
-	ld	r28, VCPU_GPR(r28)(r4)
-	ld	r29, VCPU_GPR(r29)(r4)
-	ld	r30, VCPU_GPR(r30)(r4)
-	ld	r31, VCPU_GPR(r31)(r4)
+	VCPU_LOAD_NVGPRS(r4)
+
+	/* Save R1/R2 in the PACA */
+	std	r1, PACA_KVM_HOST_R1(r13)
+	std	r2, PACA_KVM_HOST_R2(r13)
+
+	/* XXX swap in/out on load? */
+	ld	r3, VCPU_HIGHMEM_HANDLER(r4)
+	std	r3, PACA_KVM_VMHANDLER(r13)
+
+kvm_start_lightweight:
 
 	ld	r9, VCPU_PC(r4)			/* r9 = vcpu->arch.pc */
 	ld	r10, VCPU_SHADOW_MSR(r4)	/* r10 = vcpu->arch.shadow_msr */
 
-	ld	r3, VCPU_TRAMPOLINE_ENTER(r4)
-	mtsrr0	r3
-
-	LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
-	mtsrr1	r3
-
-	/* Load guest state in the respective registers */
-	lwz	r3, VCPU_CR(r4)		/* r3 = vcpu->arch.cr */
-	stw	r3, (PACA_EXMC + EX_CCR)(r13)
-
-	ld	r3, VCPU_CTR(r4)	/* r3 = vcpu->arch.ctr */
-	mtctr	r3			/* CTR = r3 */
+	/* Load some guest state in the respective registers */
+	ld	r5, VCPU_CTR(r4)	/* r5 = vcpu->arch.ctr */
+					/* will be swapped in by rmcall */
 
 	ld	r3, VCPU_LR(r4)		/* r3 = vcpu->arch.lr */
 	mtlr	r3			/* LR = r3 */
 
-	ld	r3, VCPU_XER(r4)	/* r3 = vcpu->arch.xer */
-	std	r3, (PACA_EXMC + EX_R3)(r13)
+	DISABLE_INTERRUPTS
 
 	/* Some guests may need to have dcbz set to 32 byte length.
 	 *
@@ -141,36 +127,15 @@
 	mtspr   SPRN_HID5,r3
 
 no_dcbz32_on:
-	/*	Load guest GPRs */
 
-	ld	r3, VCPU_GPR(r9)(r4)
-	std	r3, (PACA_EXMC + EX_R9)(r13)
-	ld	r3, VCPU_GPR(r10)(r4)
-	std	r3, (PACA_EXMC + EX_R10)(r13)
-	ld	r3, VCPU_GPR(r11)(r4)
-	std	r3, (PACA_EXMC + EX_R11)(r13)
-	ld	r3, VCPU_GPR(r12)(r4)
-	std	r3, (PACA_EXMC + EX_R12)(r13)
-	ld	r3, VCPU_GPR(r13)(r4)
-	std	r3, (PACA_EXMC + EX_R13)(r13)
+	ld	r6, VCPU_RMCALL(r4)
+	mtctr	r6
 
-	ld	r0, VCPU_GPR(r0)(r4)
-	ld	r1, VCPU_GPR(r1)(r4)
-	ld	r2, VCPU_GPR(r2)(r4)
-	ld	r3, VCPU_GPR(r3)(r4)
-	ld	r5, VCPU_GPR(r5)(r4)
-	ld	r6, VCPU_GPR(r6)(r4)
-	ld	r7, VCPU_GPR(r7)(r4)
-	ld	r8, VCPU_GPR(r8)(r4)
-	ld	r4, VCPU_GPR(r4)(r4)
-
-	/* This sets the Magic value for the trampoline */
-
-	li	r11, 1
-	stb	r11, PACA_KVM_IN_GUEST(r13)
+	ld	r3, VCPU_TRAMPOLINE_ENTER(r4)
+	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
 
 	/* Jump to SLB patching handlder and into our guest */
-	RFI
+	bctr
 
 /*
  * This is the handler in module memory. It gets jumped at from the
@@ -184,125 +149,70 @@
 	/*
 	 * Register usage at this point:
 	 *
-	 * R00   = guest R13
-	 * R01   = host R1
-	 * R02   = host R2
-	 * R10   = guest PC
-	 * R11   = guest MSR
-	 * R12   = exit handler id
-	 * R13   = PACA
-	 * PACA.exmc.R9    = guest R1
-	 * PACA.exmc.R10   = guest R10
-	 * PACA.exmc.R11   = guest R11
-	 * PACA.exmc.R12   = guest R12
-	 * PACA.exmc.R13   = guest R2
-	 * PACA.exmc.DAR   = guest DAR
-	 * PACA.exmc.DSISR = guest DSISR
-	 * PACA.exmc.LR    = guest instruction
-	 * PACA.exmc.CCR   = guest CR
-	 * PACA.exmc.SRR0  = guest R0
+	 * R0         = guest last inst
+	 * R1         = host R1
+	 * R2         = host R2
+	 * R3         = guest PC
+	 * R4         = guest MSR
+	 * R5         = guest DAR
+	 * R6         = guest DSISR
+	 * R13        = PACA
+	 * PACA.KVM.* = guest *
 	 *
 	 */
 
-	std	r3, (PACA_EXMC+EX_R3)(r13)
-
-	/* save the exit id in R3 */
-	mr	r3, r12
-
-	/* R12 = vcpu */
-	ld	r12, GPR4(r1)
+	/* R7 = vcpu */
+	ld	r7, GPR4(r1)
 
 	/* Now save the guest state */
 
-	std	r0, VCPU_GPR(r13)(r12)
-	std	r4, VCPU_GPR(r4)(r12)
-	std	r5, VCPU_GPR(r5)(r12)
-	std	r6, VCPU_GPR(r6)(r12)
-	std	r7, VCPU_GPR(r7)(r12)
-	std	r8, VCPU_GPR(r8)(r12)
-	std	r9, VCPU_GPR(r9)(r12)
+	stw	r0, VCPU_LAST_INST(r7)
 
-	/* get registers from PACA */
-	mfpaca	r5, r0, EX_SRR0, r12
-	mfpaca	r5, r3, EX_R3, r12
-	mfpaca	r5, r1, EX_R9, r12
-	mfpaca	r5, r10, EX_R10, r12
-	mfpaca	r5, r11, EX_R11, r12
-	mfpaca	r5, r12, EX_R12, r12
-	mfpaca	r5, r2, EX_R13, r12
+	std	r3, VCPU_PC(r7)
+	std	r4, VCPU_SHADOW_SRR1(r7)
+	std	r5, VCPU_FAULT_DEAR(r7)
+	std	r6, VCPU_FAULT_DSISR(r7)
 
-	lwz	r5, (PACA_EXMC+EX_LR)(r13)
-	stw	r5, VCPU_LAST_INST(r12)
-
-	lwz	r5, (PACA_EXMC+EX_CCR)(r13)
-	stw	r5, VCPU_CR(r12)
-
-	ld	r5, VCPU_HFLAGS(r12)
+	ld	r5, VCPU_HFLAGS(r7)
 	rldicl.	r5, r5, 0, 63		/* CR = ((r5 & 1) == 0) */
 	beq	no_dcbz32_off
 
+	li	r4, 0
 	mfspr   r5,SPRN_HID5
-	rldimi  r5,r5,6,56
+	rldimi  r5,r4,6,56
 	mtspr   SPRN_HID5,r5
 
 no_dcbz32_off:
 
-	/* XXX maybe skip on lightweight? */
-	std	r14, VCPU_GPR(r14)(r12)
-	std	r15, VCPU_GPR(r15)(r12)
-	std	r16, VCPU_GPR(r16)(r12)
-	std	r17, VCPU_GPR(r17)(r12)
-	std	r18, VCPU_GPR(r18)(r12)
-	std	r19, VCPU_GPR(r19)(r12)
-	std	r20, VCPU_GPR(r20)(r12)
-	std	r21, VCPU_GPR(r21)(r12)
-	std	r22, VCPU_GPR(r22)(r12)
-	std	r23, VCPU_GPR(r23)(r12)
-	std	r24, VCPU_GPR(r24)(r12)
-	std	r25, VCPU_GPR(r25)(r12)
-	std	r26, VCPU_GPR(r26)(r12)
-	std	r27, VCPU_GPR(r27)(r12)
-	std	r28, VCPU_GPR(r28)(r12)
-	std	r29, VCPU_GPR(r29)(r12)
-	std	r30, VCPU_GPR(r30)(r12)
-	std	r31, VCPU_GPR(r31)(r12)
+	std	r14, VCPU_GPR(r14)(r7)
+	std	r15, VCPU_GPR(r15)(r7)
+	std	r16, VCPU_GPR(r16)(r7)
+	std	r17, VCPU_GPR(r17)(r7)
+	std	r18, VCPU_GPR(r18)(r7)
+	std	r19, VCPU_GPR(r19)(r7)
+	std	r20, VCPU_GPR(r20)(r7)
+	std	r21, VCPU_GPR(r21)(r7)
+	std	r22, VCPU_GPR(r22)(r7)
+	std	r23, VCPU_GPR(r23)(r7)
+	std	r24, VCPU_GPR(r24)(r7)
+	std	r25, VCPU_GPR(r25)(r7)
+	std	r26, VCPU_GPR(r26)(r7)
+	std	r27, VCPU_GPR(r27)(r7)
+	std	r28, VCPU_GPR(r28)(r7)
+	std	r29, VCPU_GPR(r29)(r7)
+	std	r30, VCPU_GPR(r30)(r7)
+	std	r31, VCPU_GPR(r31)(r7)
 
-	/* Restore non-volatile host registers (r14 - r31) */
-	REST_NVGPRS(r1)
-
-	/* Save guest PC (R10) */
-	std	r10, VCPU_PC(r12)
-
-	/* Save guest msr (R11) */
-	std	r11, VCPU_SHADOW_MSR(r12)
-
-	/* Save guest CTR (in R12) */
+	/* Save guest CTR */
 	mfctr	r5
-	std	r5, VCPU_CTR(r12)
+	std	r5, VCPU_CTR(r7)
 
 	/* Save guest LR */
 	mflr	r5
-	std	r5, VCPU_LR(r12)
-
-	/* Save guest XER */
-	mfxer	r5
-	std	r5, VCPU_XER(r12)
-
-	/* Save guest DAR */
-	ld	r5, (PACA_EXMC+EX_DAR)(r13)
-	std	r5, VCPU_FAULT_DEAR(r12)
-
-	/* Save guest DSISR */
-	lwz	r5, (PACA_EXMC+EX_DSISR)(r13)
-	std	r5, VCPU_FAULT_DSISR(r12)
+	std	r5, VCPU_LR(r7)
 
 	/* Restore host msr -> SRR1 */
-	ld	r7, VCPU_HOST_MSR(r12)
-	mtsrr1	r7
-
-	/* Restore host IP -> SRR0 */
-	ld	r6, VCPU_HOST_RETIP(r12)
-	mtsrr0	r6
+	ld	r6, VCPU_HOST_MSR(r7)
 
 	/*
 	 * For some interrupts, we need to call the real Linux
@@ -314,13 +224,14 @@
 	 * r3 = address of interrupt handler (exit reason)
 	 */
 
-	cmpwi	r3, BOOK3S_INTERRUPT_EXTERNAL
+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
 	beq	call_linux_handler
-	cmpwi	r3, BOOK3S_INTERRUPT_DECREMENTER
+	cmpwi	r12, BOOK3S_INTERRUPT_DECREMENTER
 	beq	call_linux_handler
 
-	/* Back to Interruptable Mode! (goto kvm_return_point) */
-	RFI
+	/* Back to EE=1 */
+	mtmsr	r6
+	b	kvm_return_point
 
 call_linux_handler:
 
@@ -333,16 +244,22 @@
 	 * interrupt handler!
 	 *
 	 * R3 still contains the exit code,
-	 * R6 VCPU_HOST_RETIP and
-	 * R7 VCPU_HOST_MSR
+	 * R5 VCPU_HOST_RETIP and
+	 * R6 VCPU_HOST_MSR
 	 */
 
-	mtlr	r3
+	/* Restore host IP -> SRR0 */
+	ld	r5, VCPU_HOST_RETIP(r7)
 
-	ld	r5, VCPU_TRAMPOLINE_LOWMEM(r12)
-	mtsrr0	r5
-	LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR))
-	mtsrr1	r5
+	/* XXX Better move to a safe function?
+	 *     What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
+
+	mtlr	r12
+
+	ld	r4, VCPU_TRAMPOLINE_LOWMEM(r7)
+	mtsrr0	r4
+	LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+	mtsrr1	r3
 
 	RFI
 
@@ -351,42 +268,51 @@
 
 	/* Jump back to lightweight entry if we're supposed to */
 	/* go back into the guest */
-	mr	r5, r3
+
+	/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
+	mr	r5, r12
+
 	/* Restore r3 (kvm_run) and r4 (vcpu) */
 	REST_2GPRS(3, r1)
 	bl	KVMPPC_HANDLE_EXIT
 
-#if 0 /* XXX get lightweight exits back */
+	/* If RESUME_GUEST, get back in the loop */
 	cmpwi	r3, RESUME_GUEST
-	bne	kvm_exit_heavyweight
+	beq	kvm_loop_lightweight
 
-	/* put VCPU and KVM_RUN back into place and roll again! */
-	REST_2GPRS(3, r1)
-	b	kvm_start_lightweight
+	cmpwi	r3, RESUME_GUEST_NV
+	beq	kvm_loop_heavyweight
 
-kvm_exit_heavyweight:
-	/* Restore non-volatile host registers */
-	ld	r14, _LINK(r1)
-	mtlr	r14
-	REST_NVGPRS(r1)
+kvm_exit_loop:
 
-	addi    r1, r1, SWITCH_FRAME_SIZE
-#else
 	ld	r4, _LINK(r1)
 	mtlr	r4
 
-	cmpwi	r3, RESUME_GUEST
-	bne	kvm_exit_heavyweight
+	/* Restore non-volatile host registers (r14 - r31) */
+	REST_NVGPRS(r1)
 
+	addi    r1, r1, SWITCH_FRAME_SIZE
+	blr
+
+kvm_loop_heavyweight:
+
+	ld	r4, _LINK(r1)
+	std     r4, (16 + SWITCH_FRAME_SIZE)(r1)
+
+	/* Load vcpu and cpu_run */
 	REST_2GPRS(3, r1)
 
-	addi    r1, r1, SWITCH_FRAME_SIZE
+	/* Load non-volatile guest state from the vcpu */
+	VCPU_LOAD_NVGPRS(r4)
 
-	b	kvm_start_entry
+	/* Jump back into the beginning of this function */
+	b	kvm_start_lightweight
 
-kvm_exit_heavyweight:
+kvm_loop_lightweight:
 
-	addi    r1, r1, SWITCH_FRAME_SIZE
-#endif
+	/* We'll need the vcpu pointer */
+	REST_GPR(4, r1)
 
-	blr
+	/* Jump back into the beginning of this function */
+	b	kvm_start_lightweight
+
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index e4beeb3..512dcff 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -54,7 +54,7 @@
 		if (!vcpu_book3s->slb[i].valid)
 			continue;
 
-		if (vcpu_book3s->slb[i].large)
+		if (vcpu_book3s->slb[i].tb)
 			cmp_esid = esid_1t;
 
 		if (vcpu_book3s->slb[i].esid == cmp_esid)
@@ -65,9 +65,10 @@
 		eaddr, esid, esid_1t);
 	for (i = 0; i < vcpu_book3s->slb_nr; i++) {
 	    if (vcpu_book3s->slb[i].vsid)
-		dprintk("  %d: %c%c %llx %llx\n", i,
+		dprintk("  %d: %c%c%c %llx %llx\n", i,
 			vcpu_book3s->slb[i].valid ? 'v' : ' ',
 			vcpu_book3s->slb[i].large ? 'l' : ' ',
+			vcpu_book3s->slb[i].tb    ? 't' : ' ',
 			vcpu_book3s->slb[i].esid,
 			vcpu_book3s->slb[i].vsid);
 	}
@@ -84,7 +85,7 @@
 	if (!slb)
 		return 0;
 
-	if (slb->large)
+	if (slb->tb)
 		return (((u64)eaddr >> 12) & 0xfffffff) |
 		       (((u64)slb->vsid) << 28);
 
@@ -309,7 +310,8 @@
 	slbe = &vcpu_book3s->slb[slb_nr];
 
 	slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
-	slbe->esid  = slbe->large ? esid_1t : esid;
+	slbe->tb    = (rs & SLB_VSID_B_1T) ? 1 : 0;
+	slbe->esid  = slbe->tb ? esid_1t : esid;
 	slbe->vsid  = rs >> 12;
 	slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
 	slbe->Ks    = (rs & SLB_VSID_KS) ? 1 : 0;
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_64_rmhandlers.S
index fb7dd2e..c83c60a 100644
--- a/arch/powerpc/kvm/book3s_64_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_64_rmhandlers.S
@@ -45,36 +45,25 @@
 	 * To distinguish, we check a magic byte in the PACA
 	 */
 	mfspr	r13, SPRN_SPRG_PACA		/* r13 = PACA */
-	std	r12, (PACA_EXMC + EX_R12)(r13)
+	std	r12, PACA_KVM_SCRATCH0(r13)
 	mfcr	r12
-	stw	r12, (PACA_EXMC + EX_CCR)(r13)
+	stw	r12, PACA_KVM_SCRATCH1(r13)
 	lbz	r12, PACA_KVM_IN_GUEST(r13)
-	cmpwi	r12, 0
+	cmpwi	r12, KVM_GUEST_MODE_NONE
 	bne	..kvmppc_handler_hasmagic_\intno
 	/* No KVM guest? Then jump back to the Linux handler! */
-	lwz	r12, (PACA_EXMC + EX_CCR)(r13)
+	lwz	r12, PACA_KVM_SCRATCH1(r13)
 	mtcr	r12
-	ld	r12, (PACA_EXMC + EX_R12)(r13)
+	ld	r12, PACA_KVM_SCRATCH0(r13)
 	mfspr	r13, SPRN_SPRG_SCRATCH0		/* r13 = original r13 */
 	b	kvmppc_resume_\intno		/* Get back original handler */
 
 	/* Now we know we're handling a KVM guest */
 ..kvmppc_handler_hasmagic_\intno:
-	/* Unset guest state */
-	li	r12, 0
-	stb	r12, PACA_KVM_IN_GUEST(r13)
 
-	std	r1, (PACA_EXMC+EX_R9)(r13)
-	std	r10, (PACA_EXMC+EX_R10)(r13)
-	std	r11, (PACA_EXMC+EX_R11)(r13)
-	std	r2, (PACA_EXMC+EX_R13)(r13)
-
-	mfsrr0	r10
-	mfsrr1	r11
-
-	/* Restore R1/R2 so we can handle faults */
-	ld	r1, PACAR1(r13)
-	ld	r2, (PACA_EXMC+EX_SRR0)(r13)
+	/* Should we just skip the faulting instruction? */
+	cmpwi	r12, KVM_GUEST_MODE_SKIP
+	beq	kvmppc_handler_skip_ins
 
 	/* Let's store which interrupt we're handling */
 	li	r12, \intno
@@ -102,23 +91,107 @@
 INTERRUPT_TRAMPOLINE	BOOK3S_INTERRUPT_VSX
 
 /*
+ * Bring us back to the faulting code, but skip the
+ * faulting instruction.
+ *
+ * This is a generic exit path from the interrupt
+ * trampolines above.
+ *
+ * Input Registers:
+ *
+ * R12               = free
+ * R13               = PACA
+ * PACA.KVM.SCRATCH0 = guest R12
+ * PACA.KVM.SCRATCH1 = guest CR
+ * SPRG_SCRATCH0     = guest R13
+ *
+ */
+kvmppc_handler_skip_ins:
+
+	/* Patch the IP to the next instruction */
+	mfsrr0	r12
+	addi	r12, r12, 4
+	mtsrr0	r12
+
+	/* Clean up all state */
+	lwz	r12, PACA_KVM_SCRATCH1(r13)
+	mtcr	r12
+	ld	r12, PACA_KVM_SCRATCH0(r13)
+	mfspr	r13, SPRN_SPRG_SCRATCH0
+
+	/* And get back into the code */
+	RFI
+
+/*
  * This trampoline brings us back to a real mode handler
  *
  * Input Registers:
  *
- * R6 = SRR0
- * R7 = SRR1
+ * R5 = SRR0
+ * R6 = SRR1
  * LR = real-mode IP
  *
  */
 .global kvmppc_handler_lowmem_trampoline
 kvmppc_handler_lowmem_trampoline:
 
-	mtsrr0	r6
-	mtsrr1	r7
+	mtsrr0	r5
+	mtsrr1	r6
 	blr
 kvmppc_handler_lowmem_trampoline_end:
 
+/*
+ * Call a function in real mode
+ *
+ * Input Registers:
+ *
+ * R3 = function
+ * R4 = MSR
+ * R5 = CTR
+ *
+ */
+_GLOBAL(kvmppc_rmcall)
+	mtmsr	r4		/* Disable relocation, so mtsrr
+				   doesn't get interrupted */
+	mtctr	r5
+	mtsrr0	r3
+	mtsrr1	r4
+	RFI
+
+/*
+ * Activate current's external feature (FPU/Altivec/VSX)
+ */
+#define define_load_up(what) 				\
+							\
+_GLOBAL(kvmppc_load_up_ ## what);			\
+	subi	r1, r1, INT_FRAME_SIZE;			\
+	mflr	r3;					\
+	std	r3, _LINK(r1);				\
+	mfmsr	r4;					\
+	std	r31, GPR3(r1);				\
+	mr	r31, r4;				\
+	li	r5, MSR_DR;				\
+	oris	r5, r5, MSR_EE@h;			\
+	andc	r4, r4, r5;				\
+	mtmsr	r4;					\
+							\
+	bl	.load_up_ ## what;			\
+							\
+	mtmsr	r31;					\
+	ld	r3, _LINK(r1);				\
+	ld	r31, GPR3(r1);				\
+	addi	r1, r1, INT_FRAME_SIZE;			\
+	mtlr	r3;					\
+	blr
+
+define_load_up(fpu)
+#ifdef CONFIG_ALTIVEC
+define_load_up(altivec)
+#endif
+#ifdef CONFIG_VSX
+define_load_up(vsx)
+#endif
+
 .global kvmppc_trampoline_lowmem
 kvmppc_trampoline_lowmem:
 	.long kvmppc_handler_lowmem_trampoline - _stext
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index ecd237a..35b76272 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -31,7 +31,7 @@
 #define REBOLT_SLB_ENTRY(num) \
 	ld	r10, SHADOW_SLB_ESID(num)(r11); \
 	cmpdi	r10, 0; \
-	beq	slb_exit_skip_1; \
+	beq	slb_exit_skip_ ## num; \
 	oris	r10, r10, SLB_ESID_V@h; \
 	ld	r9, SHADOW_SLB_VSID(num)(r11); \
 	slbmte	r9, r10; \
@@ -51,23 +51,21 @@
 	 *
 	 * MSR = ~IR|DR
 	 * R13 = PACA
+	 * R1 = host R1
+	 * R2 = host R2
 	 * R9 = guest IP
 	 * R10 = guest MSR
-	 * R11 = free
-	 * R12 = free
-	 * PACA[PACA_EXMC + EX_R9] = guest R9
-	 * PACA[PACA_EXMC + EX_R10] = guest R10
-	 * PACA[PACA_EXMC + EX_R11] = guest R11
-	 * PACA[PACA_EXMC + EX_R12] = guest R12
-	 * PACA[PACA_EXMC + EX_R13] = guest R13
-	 * PACA[PACA_EXMC + EX_CCR] = guest CR
-	 * PACA[PACA_EXMC + EX_R3] = guest XER
+	 * all other GPRS = free
+	 * PACA[KVM_CR] = guest CR
+	 * PACA[KVM_XER] = guest XER
 	 */
 
 	mtsrr0	r9
 	mtsrr1	r10
 
-	mtspr	SPRN_SPRG_SCRATCH0, r0
+	/* Activate guest mode, so faults get handled by KVM */
+	li	r11, KVM_GUEST_MODE_GUEST
+	stb	r11, PACA_KVM_IN_GUEST(r13)
 
 	/* Remove LPAR shadow entries */
 
@@ -131,20 +129,27 @@
 
 	/* Enter guest */
 
-	mfspr	r0, SPRN_SPRG_SCRATCH0
+	ld	r0, (PACA_KVM_R0)(r13)
+	ld	r1, (PACA_KVM_R1)(r13)
+	ld	r2, (PACA_KVM_R2)(r13)
+	ld	r3, (PACA_KVM_R3)(r13)
+	ld	r4, (PACA_KVM_R4)(r13)
+	ld	r5, (PACA_KVM_R5)(r13)
+	ld	r6, (PACA_KVM_R6)(r13)
+	ld	r7, (PACA_KVM_R7)(r13)
+	ld	r8, (PACA_KVM_R8)(r13)
+	ld	r9, (PACA_KVM_R9)(r13)
+	ld	r10, (PACA_KVM_R10)(r13)
+	ld	r12, (PACA_KVM_R12)(r13)
 
-	ld	r9, (PACA_EXMC+EX_R9)(r13)
-	ld	r10, (PACA_EXMC+EX_R10)(r13)
-	ld	r12, (PACA_EXMC+EX_R12)(r13)
-
-	lwz	r11, (PACA_EXMC+EX_CCR)(r13)
+	lwz	r11, (PACA_KVM_CR)(r13)
 	mtcr	r11
 
-	ld	r11, (PACA_EXMC+EX_R3)(r13)
+	ld	r11, (PACA_KVM_XER)(r13)
 	mtxer	r11
 
-	ld	r11, (PACA_EXMC+EX_R11)(r13)
-	ld	r13, (PACA_EXMC+EX_R13)(r13)
+	ld	r11, (PACA_KVM_R11)(r13)
+	ld	r13, (PACA_KVM_R13)(r13)
 
 	RFI
 kvmppc_handler_trampoline_enter_end:
@@ -162,28 +167,54 @@
 
 	/* Register usage at this point:
 	 *
-	 * SPRG_SCRATCH0 = guest R13
-	 * R01           = host R1
-	 * R02           = host R2
-	 * R10           = guest PC
-	 * R11           = guest MSR
-	 * R12           = exit handler id
-	 * R13           = PACA
-	 * PACA.exmc.CCR  = guest CR
-	 * PACA.exmc.R9  = guest R1
-	 * PACA.exmc.R10 = guest R10
-	 * PACA.exmc.R11 = guest R11
-	 * PACA.exmc.R12 = guest R12
-	 * PACA.exmc.R13 = guest R2
+	 * SPRG_SCRATCH0     = guest R13
+	 * R12               = exit handler id
+	 * R13               = PACA
+	 * PACA.KVM.SCRATCH0 = guest R12
+	 * PACA.KVM.SCRATCH1 = guest CR
 	 *
 	 */
 
 	/* Save registers */
 
-	std	r0, (PACA_EXMC+EX_SRR0)(r13)
-	std	r9, (PACA_EXMC+EX_R3)(r13)
-	std	r10, (PACA_EXMC+EX_LR)(r13)
-	std	r11, (PACA_EXMC+EX_DAR)(r13)
+	std	r0, PACA_KVM_R0(r13)
+	std	r1, PACA_KVM_R1(r13)
+	std	r2, PACA_KVM_R2(r13)
+	std	r3, PACA_KVM_R3(r13)
+	std	r4, PACA_KVM_R4(r13)
+	std	r5, PACA_KVM_R5(r13)
+	std	r6, PACA_KVM_R6(r13)
+	std	r7, PACA_KVM_R7(r13)
+	std	r8, PACA_KVM_R8(r13)
+	std	r9, PACA_KVM_R9(r13)
+	std	r10, PACA_KVM_R10(r13)
+	std	r11, PACA_KVM_R11(r13)
+
+	/* Restore R1/R2 so we can handle faults */
+	ld	r1, PACA_KVM_HOST_R1(r13)
+	ld	r2, PACA_KVM_HOST_R2(r13)
+
+	/* Save guest PC and MSR in GPRs */
+	mfsrr0	r3
+	mfsrr1	r4
+
+	/* Get scratch'ed off registers */
+	mfspr	r9, SPRN_SPRG_SCRATCH0
+	std	r9, PACA_KVM_R13(r13)
+
+	ld	r8, PACA_KVM_SCRATCH0(r13)
+	std	r8, PACA_KVM_R12(r13)
+
+	lwz	r7, PACA_KVM_SCRATCH1(r13)
+	stw	r7, PACA_KVM_CR(r13)
+
+	/* Save more register state  */
+
+	mfxer	r6
+	stw	r6, PACA_KVM_XER(r13)
+
+	mfdar	r5
+	mfdsisr	r6
 
 	/*
 	 * In order for us to easily get the last instruction,
@@ -202,17 +233,28 @@
 
 ld_last_inst:
 	/* Save off the guest instruction we're at */
+
+	/* Set guest mode to 'jump over instruction' so if lwz faults
+	 * we'll just continue at the next IP. */
+	li	r9, KVM_GUEST_MODE_SKIP
+	stb	r9, PACA_KVM_IN_GUEST(r13)
+
 	/*    1) enable paging for data */
 	mfmsr	r9
 	ori	r11, r9, MSR_DR			/* Enable paging for data */
 	mtmsr	r11
 	/*    2) fetch the instruction */
-	lwz	r0, 0(r10)
+	li	r0, KVM_INST_FETCH_FAILED	/* In case lwz faults */
+	lwz	r0, 0(r3)
 	/*    3) disable paging again */
 	mtmsr	r9
 
 no_ld_last_inst:
 
+	/* Unset guest mode */
+	li	r9, KVM_GUEST_MODE_NONE
+	stb	r9, PACA_KVM_IN_GUEST(r13)
+
 	/* Restore bolted entries from the shadow and fix it along the way */
 
 	/* We don't store anything in entry 0, so we don't need to take care of it */
@@ -233,29 +275,27 @@
 
 slb_do_exit:
 
-	/* Restore registers */
-
-	ld	r11, (PACA_EXMC+EX_DAR)(r13)
-	ld	r10, (PACA_EXMC+EX_LR)(r13)
-	ld	r9, (PACA_EXMC+EX_R3)(r13)
-
-	/* Save last inst */
-	stw	r0, (PACA_EXMC+EX_LR)(r13)
-
-	/* Save DAR and DSISR before going to paged mode */
-	mfdar	r0
-	std	r0, (PACA_EXMC+EX_DAR)(r13)
-	mfdsisr	r0
-	stw	r0, (PACA_EXMC+EX_DSISR)(r13)
+	/* Register usage at this point:
+	 *
+	 * R0         = guest last inst
+	 * R1         = host R1
+	 * R2         = host R2
+	 * R3         = guest PC
+	 * R4         = guest MSR
+	 * R5         = guest DAR
+	 * R6         = guest DSISR
+	 * R12        = exit handler id
+	 * R13        = PACA
+	 * PACA.KVM.* = guest *
+	 *
+	 */
 
 	/* RFI into the highmem handler */
-	mfmsr	r0
-	ori	r0, r0, MSR_IR|MSR_DR|MSR_RI	/* Enable paging */
-	mtsrr1	r0
-	ld	r0, PACASAVEDMSR(r13)		/* Highmem handler address */
-	mtsrr0	r0
-
-	mfspr	r0, SPRN_SPRG_SCRATCH0
+	mfmsr	r7
+	ori	r7, r7, MSR_IR|MSR_DR|MSR_RI	/* Enable paging */
+	mtsrr1	r7
+	ld	r8, PACA_KVM_VMHANDLER(r13)	/* Highmem handler address */
+	mtsrr0	r8
 
 	RFI
 kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 06f5a9e..4d686cc 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -69,10 +69,10 @@
 
 	for (i = 0; i < 32; i += 4) {
 		printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
-		       vcpu->arch.gpr[i],
-		       vcpu->arch.gpr[i+1],
-		       vcpu->arch.gpr[i+2],
-		       vcpu->arch.gpr[i+3]);
+		       kvmppc_get_gpr(vcpu, i),
+		       kvmppc_get_gpr(vcpu, i+1),
+		       kvmppc_get_gpr(vcpu, i+2),
+		       kvmppc_get_gpr(vcpu, i+3));
 	}
 }
 
@@ -82,8 +82,32 @@
 	set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+                                        ulong dear_flags, ulong esr_flags)
 {
+	vcpu->arch.queued_dear = dear_flags;
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+}
+
+static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+                                           ulong dear_flags, ulong esr_flags)
+{
+	vcpu->arch.queued_dear = dear_flags;
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+}
+
+static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
+                                           ulong esr_flags)
+{
+	vcpu->arch.queued_esr = esr_flags;
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+}
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
+{
+	vcpu->arch.queued_esr = esr_flags;
 	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 }
 
@@ -97,6 +121,11 @@
 	return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
 
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
@@ -109,14 +138,19 @@
 {
 	int allowed = 0;
 	ulong msr_mask;
+	bool update_esr = false, update_dear = false;
 
 	switch (priority) {
-	case BOOKE_IRQPRIO_PROGRAM:
 	case BOOKE_IRQPRIO_DTLB_MISS:
+	case BOOKE_IRQPRIO_DATA_STORAGE:
+		update_dear = true;
+		/* fall through */
+	case BOOKE_IRQPRIO_INST_STORAGE:
+	case BOOKE_IRQPRIO_PROGRAM:
+		update_esr = true;
+		/* fall through */
 	case BOOKE_IRQPRIO_ITLB_MISS:
 	case BOOKE_IRQPRIO_SYSCALL:
-	case BOOKE_IRQPRIO_DATA_STORAGE:
-	case BOOKE_IRQPRIO_INST_STORAGE:
 	case BOOKE_IRQPRIO_FP_UNAVAIL:
 	case BOOKE_IRQPRIO_SPE_UNAVAIL:
 	case BOOKE_IRQPRIO_SPE_FP_DATA:
@@ -151,6 +185,10 @@
 		vcpu->arch.srr0 = vcpu->arch.pc;
 		vcpu->arch.srr1 = vcpu->arch.msr;
 		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+		if (update_esr == true)
+			vcpu->arch.esr = vcpu->arch.queued_esr;
+		if (update_dear == true)
+			vcpu->arch.dear = vcpu->arch.queued_dear;
 		kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
 
 		clear_bit(priority, &vcpu->arch.pending_exceptions);
@@ -223,8 +261,7 @@
 		if (vcpu->arch.msr & MSR_PR) {
 			/* Program traps generated by user-level software must be handled
 			 * by the guest kernel. */
-			vcpu->arch.esr = vcpu->arch.fault_esr;
-			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+			kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
 			r = RESUME_GUEST;
 			kvmppc_account_exit(vcpu, USR_PR_INST);
 			break;
@@ -280,16 +317,14 @@
 		break;
 
 	case BOOKE_INTERRUPT_DATA_STORAGE:
-		vcpu->arch.dear = vcpu->arch.fault_dear;
-		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+		kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
+		                               vcpu->arch.fault_esr);
 		kvmppc_account_exit(vcpu, DSI_EXITS);
 		r = RESUME_GUEST;
 		break;
 
 	case BOOKE_INTERRUPT_INST_STORAGE:
-		vcpu->arch.esr = vcpu->arch.fault_esr;
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+		kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
 		kvmppc_account_exit(vcpu, ISI_EXITS);
 		r = RESUME_GUEST;
 		break;
@@ -310,9 +345,9 @@
 		gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
 		if (gtlb_index < 0) {
 			/* The guest didn't have a mapping for it. */
-			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
-			vcpu->arch.dear = vcpu->arch.fault_dear;
-			vcpu->arch.esr = vcpu->arch.fault_esr;
+			kvmppc_core_queue_dtlb_miss(vcpu,
+			                            vcpu->arch.fault_dear,
+			                            vcpu->arch.fault_esr);
 			kvmppc_mmu_dtlb_miss(vcpu);
 			kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
 			r = RESUME_GUEST;
@@ -426,7 +461,7 @@
 {
 	vcpu->arch.pc = 0;
 	vcpu->arch.msr = 0;
-	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
+	kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
 
 	vcpu->arch.shadow_pid = 1;
 
@@ -444,10 +479,10 @@
 	int i;
 
 	regs->pc = vcpu->arch.pc;
-	regs->cr = vcpu->arch.cr;
+	regs->cr = kvmppc_get_cr(vcpu);
 	regs->ctr = vcpu->arch.ctr;
 	regs->lr = vcpu->arch.lr;
-	regs->xer = vcpu->arch.xer;
+	regs->xer = kvmppc_get_xer(vcpu);
 	regs->msr = vcpu->arch.msr;
 	regs->srr0 = vcpu->arch.srr0;
 	regs->srr1 = vcpu->arch.srr1;
@@ -461,7 +496,7 @@
 	regs->sprg7 = vcpu->arch.sprg6;
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
-		regs->gpr[i] = vcpu->arch.gpr[i];
+		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
 	return 0;
 }
@@ -471,10 +506,10 @@
 	int i;
 
 	vcpu->arch.pc = regs->pc;
-	vcpu->arch.cr = regs->cr;
+	kvmppc_set_cr(vcpu, regs->cr);
 	vcpu->arch.ctr = regs->ctr;
 	vcpu->arch.lr = regs->lr;
-	vcpu->arch.xer = regs->xer;
+	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.srr0 = regs->srr0;
 	vcpu->arch.srr1 = regs->srr1;
@@ -486,8 +521,8 @@
 	vcpu->arch.sprg6 = regs->sprg5;
 	vcpu->arch.sprg7 = regs->sprg6;
 
-	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
-		vcpu->arch.gpr[i] = regs->gpr[i];
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
 	return 0;
 }
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index aebc65e..cbc790e 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -62,20 +62,20 @@
 
 		case OP_31_XOP_MFMSR:
 			rt = get_rt(inst);
-			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
 			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
 			break;
 
 		case OP_31_XOP_MTMSR:
 			rs = get_rs(inst);
 			kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
-			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+			kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
 			break;
 
 		case OP_31_XOP_WRTEE:
 			rs = get_rs(inst);
 			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-							 | (vcpu->arch.gpr[rs] & MSR_EE);
+					| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
 			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
 			break;
 
@@ -101,22 +101,23 @@
 int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 {
 	int emulated = EMULATE_DONE;
+	ulong spr_val = kvmppc_get_gpr(vcpu, rs);
 
 	switch (sprn) {
 	case SPRN_DEAR:
-		vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.dear = spr_val; break;
 	case SPRN_ESR:
-		vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.esr = spr_val; break;
 	case SPRN_DBCR0:
-		vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.dbcr0 = spr_val; break;
 	case SPRN_DBCR1:
-		vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.dbcr1 = spr_val; break;
 	case SPRN_DBSR:
-		vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
+		vcpu->arch.dbsr &= ~spr_val; break;
 	case SPRN_TSR:
-		vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+		vcpu->arch.tsr &= ~spr_val; break;
 	case SPRN_TCR:
-		vcpu->arch.tcr = vcpu->arch.gpr[rs];
+		vcpu->arch.tcr = spr_val;
 		kvmppc_emulate_dec(vcpu);
 		break;
 
@@ -124,64 +125,64 @@
 	 * loaded into the real SPRGs when resuming the
 	 * guest. */
 	case SPRN_SPRG4:
-		vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.sprg4 = spr_val; break;
 	case SPRN_SPRG5:
-		vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.sprg5 = spr_val; break;
 	case SPRN_SPRG6:
-		vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.sprg6 = spr_val; break;
 	case SPRN_SPRG7:
-		vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+		vcpu->arch.sprg7 = spr_val; break;
 
 	case SPRN_IVPR:
-		vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+		vcpu->arch.ivpr = spr_val;
 		break;
 	case SPRN_IVOR0:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
 		break;
 	case SPRN_IVOR1:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
 		break;
 	case SPRN_IVOR2:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
 		break;
 	case SPRN_IVOR3:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
 		break;
 	case SPRN_IVOR4:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
 		break;
 	case SPRN_IVOR5:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
 		break;
 	case SPRN_IVOR6:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
 		break;
 	case SPRN_IVOR7:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
 		break;
 	case SPRN_IVOR8:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
 		break;
 	case SPRN_IVOR9:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
 		break;
 	case SPRN_IVOR10:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
 		break;
 	case SPRN_IVOR11:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
 		break;
 	case SPRN_IVOR12:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
 		break;
 	case SPRN_IVOR13:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
 		break;
 	case SPRN_IVOR14:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
 		break;
 	case SPRN_IVOR15:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
 		break;
 
 	default:
@@ -197,65 +198,65 @@
 
 	switch (sprn) {
 	case SPRN_IVPR:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
 	case SPRN_DEAR:
-		vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
 	case SPRN_ESR:
-		vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
 	case SPRN_DBCR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
 	case SPRN_DBCR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
 	case SPRN_DBSR:
-		vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
 
 	case SPRN_IVOR0:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
 		break;
 	case SPRN_IVOR1:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
 		break;
 	case SPRN_IVOR2:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
 		break;
 	case SPRN_IVOR3:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
 		break;
 	case SPRN_IVOR4:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
 		break;
 	case SPRN_IVOR5:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
 		break;
 	case SPRN_IVOR6:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
 		break;
 	case SPRN_IVOR7:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
 		break;
 	case SPRN_IVOR8:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
 		break;
 	case SPRN_IVOR9:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
 		break;
 	case SPRN_IVOR10:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
 		break;
 	case SPRN_IVOR11:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
 		break;
 	case SPRN_IVOR12:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
 		break;
 	case SPRN_IVOR13:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
 		break;
 	case SPRN_IVOR14:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
 		break;
 	case SPRN_IVOR15:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
 		break;
 
 	default:
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 64949ee..efa1198 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -60,6 +60,12 @@
 
 	kvmppc_e500_tlb_setup(vcpu_e500);
 
+	/* Registers init */
+	vcpu->arch.pvr = mfspr(SPRN_PVR);
+
+	/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
+	vcpu->vcpu_id = 0;
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index be95b8d..8e3edfb 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -74,54 +74,59 @@
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int emulated = EMULATE_DONE;
+	ulong spr_val = kvmppc_get_gpr(vcpu, rs);
 
 	switch (sprn) {
 	case SPRN_PID:
 		vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
-			vcpu->arch.pid = vcpu->arch.gpr[rs];
+			vcpu->arch.pid = spr_val;
 		break;
 	case SPRN_PID1:
-		vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->pid[1] = spr_val; break;
 	case SPRN_PID2:
-		vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->pid[2] = spr_val; break;
 	case SPRN_MAS0:
-		vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->mas0 = spr_val; break;
 	case SPRN_MAS1:
-		vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->mas1 = spr_val; break;
 	case SPRN_MAS2:
-		vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->mas2 = spr_val; break;
 	case SPRN_MAS3:
-		vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->mas3 = spr_val; break;
 	case SPRN_MAS4:
-		vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->mas4 = spr_val; break;
 	case SPRN_MAS6:
-		vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->mas6 = spr_val; break;
 	case SPRN_MAS7:
-		vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->mas7 = spr_val; break;
+	case SPRN_L1CSR0:
+		vcpu_e500->l1csr0 = spr_val;
+		vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
+		break;
 	case SPRN_L1CSR1:
-		vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->l1csr1 = spr_val; break;
 	case SPRN_HID0:
-		vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->hid0 = spr_val; break;
 	case SPRN_HID1:
-		vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
+		vcpu_e500->hid1 = spr_val; break;
 
 	case SPRN_MMUCSR0:
 		emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
-				vcpu->arch.gpr[rs]);
+				spr_val);
 		break;
 
 	/* extra exceptions */
 	case SPRN_IVOR32:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
 		break;
 	case SPRN_IVOR33:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
 		break;
 	case SPRN_IVOR34:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
 		break;
 	case SPRN_IVOR35:
-		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
+		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
 		break;
 
 	default:
@@ -138,63 +143,57 @@
 
 	switch (sprn) {
 	case SPRN_PID:
-		vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
 	case SPRN_PID1:
-		vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
 	case SPRN_PID2:
-		vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
 	case SPRN_MAS0:
-		vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
 	case SPRN_MAS1:
-		vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
 	case SPRN_MAS2:
-		vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
 	case SPRN_MAS3:
-		vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
 	case SPRN_MAS4:
-		vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
 	case SPRN_MAS6:
-		vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
 	case SPRN_MAS7:
-		vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
 
 	case SPRN_TLB0CFG:
-		vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
-		vcpu->arch.gpr[rt] &= ~0xfffUL;
-		vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
-		break;
-
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
 	case SPRN_TLB1CFG:
-		vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
-		vcpu->arch.gpr[rt] &= ~0xfffUL;
-		vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
-		break;
-
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
+	case SPRN_L1CSR0:
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
 	case SPRN_L1CSR1:
-		vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
 	case SPRN_HID0:
-		vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
 	case SPRN_HID1:
-		vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
+		kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
 
 	case SPRN_MMUCSR0:
-		vcpu->arch.gpr[rt] = 0; break;
+		kvmppc_set_gpr(vcpu, rt, 0); break;
 
 	case SPRN_MMUCFG:
-		vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
+		kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
 
 	/* extra exceptions */
 	case SPRN_IVOR32:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
 		break;
 	case SPRN_IVOR33:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
 		break;
 	case SPRN_IVOR34:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
 		break;
 	case SPRN_IVOR35:
-		vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
 		break;
 	default:
 		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index fb1e1dc..0d772e6 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -417,7 +417,7 @@
 	int esel, tlbsel;
 	gva_t ea;
 
-	ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
+	ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
 
 	ia = (ea >> 2) & 0x1;
 
@@ -470,7 +470,7 @@
 	struct tlbe *gtlbe = NULL;
 	gva_t ea;
 
-	ea = vcpu->arch.gpr[rb];
+	ea = kvmppc_get_gpr(vcpu, rb);
 
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
 		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@@ -728,6 +728,12 @@
 	if (vcpu_e500->shadow_pages[1] == NULL)
 		goto err_out_page0;
 
+	/* Init TLB configuration register */
+	vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
+	vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
+	vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
+	vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
+
 	return 0;
 
 err_out_page0:
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4a9ac66..cb72a65 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -83,6 +83,9 @@
 
 	pr_debug("mtDEC: %x\n", vcpu->arch.dec);
 #ifdef CONFIG_PPC64
+	/* mtdec lowers the interrupt line when positive. */
+	kvmppc_core_dequeue_dec(vcpu);
+
 	/* POWER4+ triggers a dec interrupt if the value is < 0 */
 	if (vcpu->arch.dec & 0x80000000) {
 		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
@@ -140,14 +143,18 @@
 
 	pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
 
+	/* Try again next time */
+	if (inst == KVM_INST_FETCH_FAILED)
+		return EMULATE_DONE;
+
 	switch (get_op(inst)) {
 	case OP_TRAP:
 #ifdef CONFIG_PPC64
 	case OP_TRAP_64:
+		kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
 #else
-		vcpu->arch.esr |= ESR_PTR;
+		kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
 #endif
-		kvmppc_core_queue_program(vcpu);
 		advance = 0;
 		break;
 
@@ -167,14 +174,14 @@
 		case OP_31_XOP_STWX:
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
-			                               vcpu->arch.gpr[rs],
+						       kvmppc_get_gpr(vcpu, rs),
 			                               4, 1);
 			break;
 
 		case OP_31_XOP_STBX:
 			rs = get_rs(inst);
 			emulated = kvmppc_handle_store(run, vcpu,
-			                               vcpu->arch.gpr[rs],
+						       kvmppc_get_gpr(vcpu, rs),
 			                               1, 1);
 			break;
 
@@ -183,14 +190,14 @@
 			ra = get_ra(inst);
 			rb = get_rb(inst);
 
-			ea = vcpu->arch.gpr[rb];
+			ea = kvmppc_get_gpr(vcpu, rb);
 			if (ra)
-				ea += vcpu->arch.gpr[ra];
+				ea += kvmppc_get_gpr(vcpu, ra);
 
 			emulated = kvmppc_handle_store(run, vcpu,
-			                               vcpu->arch.gpr[rs],
+						       kvmppc_get_gpr(vcpu, rs),
 			                               1, 1);
-			vcpu->arch.gpr[rs] = ea;
+			kvmppc_set_gpr(vcpu, rs, ea);
 			break;
 
 		case OP_31_XOP_LHZX:
@@ -203,12 +210,12 @@
 			ra = get_ra(inst);
 			rb = get_rb(inst);
 
-			ea = vcpu->arch.gpr[rb];
+			ea = kvmppc_get_gpr(vcpu, rb);
 			if (ra)
-				ea += vcpu->arch.gpr[ra];
+				ea += kvmppc_get_gpr(vcpu, ra);
 
 			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-			vcpu->arch.gpr[ra] = ea;
+			kvmppc_set_gpr(vcpu, ra, ea);
 			break;
 
 		case OP_31_XOP_MFSPR:
@@ -217,47 +224,49 @@
 
 			switch (sprn) {
 			case SPRN_SRR0:
-				vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
 			case SPRN_SRR1:
-				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
 			case SPRN_PVR:
-				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
 			case SPRN_PIR:
-				vcpu->arch.gpr[rt] = vcpu->vcpu_id; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
 			case SPRN_MSSSR0:
-				vcpu->arch.gpr[rt] = 0; break;
+				kvmppc_set_gpr(vcpu, rt, 0); break;
 
 			/* Note: mftb and TBRL/TBWL are user-accessible, so
 			 * the guest can always access the real TB anyways.
 			 * In fact, we probably will never see these traps. */
 			case SPRN_TBWL:
-				vcpu->arch.gpr[rt] = get_tb() >> 32; break;
+				kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
 			case SPRN_TBWU:
-				vcpu->arch.gpr[rt] = get_tb(); break;
+				kvmppc_set_gpr(vcpu, rt, get_tb()); break;
 
 			case SPRN_SPRG0:
-				vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
 			case SPRN_SPRG1:
-				vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
 			case SPRN_SPRG2:
-				vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
 			case SPRN_SPRG3:
-				vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
 			/* Note: SPRG4-7 are user-readable, so we don't get
 			 * a trap. */
 
 			case SPRN_DEC:
 			{
 				u64 jd = get_tb() - vcpu->arch.dec_jiffies;
-				vcpu->arch.gpr[rt] = vcpu->arch.dec - jd;
-				pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]);
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
+				pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n",
+					 vcpu->arch.dec, jd,
+					 kvmppc_get_gpr(vcpu, rt));
 				break;
 			}
 			default:
 				emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
 				if (emulated == EMULATE_FAIL) {
 					printk("mfspr: unknown spr %x\n", sprn);
-					vcpu->arch.gpr[rt] = 0;
+					kvmppc_set_gpr(vcpu, rt, 0);
 				}
 				break;
 			}
@@ -269,7 +278,7 @@
 			rb = get_rb(inst);
 
 			emulated = kvmppc_handle_store(run, vcpu,
-			                               vcpu->arch.gpr[rs],
+						       kvmppc_get_gpr(vcpu, rs),
 			                               2, 1);
 			break;
 
@@ -278,14 +287,14 @@
 			ra = get_ra(inst);
 			rb = get_rb(inst);
 
-			ea = vcpu->arch.gpr[rb];
+			ea = kvmppc_get_gpr(vcpu, rb);
 			if (ra)
-				ea += vcpu->arch.gpr[ra];
+				ea += kvmppc_get_gpr(vcpu, ra);
 
 			emulated = kvmppc_handle_store(run, vcpu,
-			                               vcpu->arch.gpr[rs],
+						       kvmppc_get_gpr(vcpu, rs),
 			                               2, 1);
-			vcpu->arch.gpr[ra] = ea;
+			kvmppc_set_gpr(vcpu, ra, ea);
 			break;
 
 		case OP_31_XOP_MTSPR:
@@ -293,9 +302,9 @@
 			rs = get_rs(inst);
 			switch (sprn) {
 			case SPRN_SRR0:
-				vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
+				vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
 			case SPRN_SRR1:
-				vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
+				vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
 
 			/* XXX We need to context-switch the timebase for
 			 * watchdog and FIT. */
@@ -305,18 +314,18 @@
 			case SPRN_MSSSR0: break;
 
 			case SPRN_DEC:
-				vcpu->arch.dec = vcpu->arch.gpr[rs];
+				vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
 				kvmppc_emulate_dec(vcpu);
 				break;
 
 			case SPRN_SPRG0:
-				vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
+				vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
 			case SPRN_SPRG1:
-				vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
+				vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
 			case SPRN_SPRG2:
-				vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
+				vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
 			case SPRN_SPRG3:
-				vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
+				vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
 
 			default:
 				emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
@@ -348,7 +357,7 @@
 			rb = get_rb(inst);
 
 			emulated = kvmppc_handle_store(run, vcpu,
-			                               vcpu->arch.gpr[rs],
+						       kvmppc_get_gpr(vcpu, rs),
 			                               4, 0);
 			break;
 
@@ -363,7 +372,7 @@
 			rb = get_rb(inst);
 
 			emulated = kvmppc_handle_store(run, vcpu,
-			                               vcpu->arch.gpr[rs],
+						       kvmppc_get_gpr(vcpu, rs),
 			                               2, 0);
 			break;
 
@@ -382,7 +391,7 @@
 		ra = get_ra(inst);
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
 		break;
 
 	case OP_LBZ:
@@ -394,35 +403,39 @@
 		ra = get_ra(inst);
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
 		break;
 
 	case OP_STW:
 		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
 		                               4, 1);
 		break;
 
 	case OP_STWU:
 		ra = get_ra(inst);
 		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
 		                               4, 1);
-		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
 		break;
 
 	case OP_STB:
 		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
 		                               1, 1);
 		break;
 
 	case OP_STBU:
 		ra = get_ra(inst);
 		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
 		                               1, 1);
-		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
 		break;
 
 	case OP_LHZ:
@@ -434,21 +447,23 @@
 		ra = get_ra(inst);
 		rt = get_rt(inst);
 		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
 		break;
 
 	case OP_STH:
 		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
 		                               2, 1);
 		break;
 
 	case OP_STHU:
 		ra = get_ra(inst);
 		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
 		                               2, 1);
-		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
 		break;
 
 	default:
@@ -461,6 +476,7 @@
 			advance = 0;
 			printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
 			       "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+			kvmppc_core_queue_program(vcpu, 0);
 		}
 	}
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f06cf93..51aedd7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -137,6 +137,7 @@
 {
 	kvmppc_free_vcpus(kvm);
 	kvm_free_physmem(kvm);
+	cleanup_srcu_struct(&kvm->srcu);
 	kfree(kvm);
 }
 
@@ -165,14 +166,24 @@
 	return -EINVAL;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               struct kvm_memory_slot old,
-                               int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                                   struct kvm_memory_slot *memslot,
+                                   struct kvm_memory_slot old,
+                                   struct kvm_userspace_memory_region *mem,
+                                   int user_alloc)
 {
 	return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+               struct kvm_userspace_memory_region *mem,
+               struct kvm_memory_slot old,
+               int user_alloc)
+{
+       return;
+}
+
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 }
@@ -260,34 +271,35 @@
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
                                      struct kvm_run *run)
 {
-	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
-	*gpr = run->dcr.data;
+	kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
 }
 
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
-	ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	ulong gpr;
 
-	if (run->mmio.len > sizeof(*gpr)) {
+	if (run->mmio.len > sizeof(gpr)) {
 		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
 		return;
 	}
 
 	if (vcpu->arch.mmio_is_bigendian) {
 		switch (run->mmio.len) {
-		case 4: *gpr = *(u32 *)run->mmio.data; break;
-		case 2: *gpr = *(u16 *)run->mmio.data; break;
-		case 1: *gpr = *(u8 *)run->mmio.data; break;
+		case 4: gpr = *(u32 *)run->mmio.data; break;
+		case 2: gpr = *(u16 *)run->mmio.data; break;
+		case 1: gpr = *(u8 *)run->mmio.data; break;
 		}
 	} else {
 		/* Convert BE data from userland back to LE. */
 		switch (run->mmio.len) {
-		case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
-		case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
-		case 1: *gpr = *(u8 *)run->mmio.data; break;
+		case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
+		case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
+		case 1: gpr = *(u8 *)run->mmio.data; break;
 		}
 	}
+
+	kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
 }
 
 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 341aff2..cd128b0 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -288,46 +288,30 @@
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = HYPFS_MAGIC;
 	sb->s_op = &hypfs_s_ops;
-	if (hypfs_parse_options(data, sb)) {
-		rc = -EINVAL;
-		goto err_alloc;
-	}
+	if (hypfs_parse_options(data, sb))
+		return -EINVAL;
 	root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
-	if (!root_inode) {
-		rc = -ENOMEM;
-		goto err_alloc;
-	}
+	if (!root_inode)
+		return -ENOMEM;
 	root_inode->i_op = &simple_dir_inode_operations;
 	root_inode->i_fop = &simple_dir_operations;
-	root_dentry = d_alloc_root(root_inode);
+	sb->s_root = root_dentry = d_alloc_root(root_inode);
 	if (!root_dentry) {
 		iput(root_inode);
-		rc = -ENOMEM;
-		goto err_alloc;
+		return -ENOMEM;
 	}
 	if (MACHINE_IS_VM)
 		rc = hypfs_vm_create_files(sb, root_dentry);
 	else
 		rc = hypfs_diag_create_files(sb, root_dentry);
 	if (rc)
-		goto err_tree;
+		return rc;
 	sbi->update_file = hypfs_create_update_file(sb, root_dentry);
-	if (IS_ERR(sbi->update_file)) {
-		rc = PTR_ERR(sbi->update_file);
-		goto err_tree;
-	}
+	if (IS_ERR(sbi->update_file))
+		return PTR_ERR(sbi->update_file);
 	hypfs_update_update(sb);
-	sb->s_root = root_dentry;
 	pr_info("Hypervisor filesystem mounted\n");
 	return 0;
-
-err_tree:
-	hypfs_delete_tree(root_dentry);
-	d_genocide(root_dentry);
-	dput(root_dentry);
-err_alloc:
-	kfree(sbi);
-	return rc;
 }
 
 static int hypfs_get_super(struct file_system_type *fst, int flags,
@@ -340,12 +324,12 @@
 {
 	struct hypfs_sb_info *sb_info = sb->s_fs_info;
 
-	if (sb->s_root) {
+	if (sb->s_root)
 		hypfs_delete_tree(sb->s_root);
+	if (sb_info->update_file)
 		hypfs_remove(sb_info->update_file);
-		kfree(sb->s_fs_info);
-		sb->s_fs_info = NULL;
-	}
+	kfree(sb->s_fs_info);
+	sb->s_fs_info = NULL;
 	kill_litter_super(sb);
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3fa0a10..4929286 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -242,6 +242,7 @@
 	kvm_free_physmem(kvm);
 	free_page((unsigned long)(kvm->arch.sca));
 	debug_unregister(kvm->arch.dbf);
+	cleanup_srcu_struct(&kvm->srcu);
 	kfree(kvm);
 }
 
@@ -690,14 +691,12 @@
 }
 
 /* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old,
-				int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_memory_slot old,
+				   struct kvm_userspace_memory_region *mem,
+				   int user_alloc)
 {
-	int i;
-	struct kvm_vcpu *vcpu;
-
 	/* A few sanity checks. We can have exactly one memory slot which has
 	   to start at guest virtual zero and which has to be located at a
 	   page boundary in userland and which has to end at a page boundary.
@@ -720,14 +719,23 @@
 	if (!user_alloc)
 		return -EINVAL;
 
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
 	/* request update of sie control block for all available vcpus */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
 			continue;
 		kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
 	}
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 06cce82..60f09ab 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -67,10 +67,14 @@
 
 static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
 {
+	int idx;
 	struct kvm_memory_slot *mem;
+	struct kvm_memslots *memslots;
 
-	down_read(&vcpu->kvm->slots_lock);
-	mem = &vcpu->kvm->memslots[0];
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	memslots = rcu_dereference(vcpu->kvm->memslots);
+
+	mem = &memslots->memslots[0];
 
 	vcpu->arch.sie_block->gmsor = mem->userspace_addr;
 	vcpu->arch.sie_block->gmslm =
@@ -78,7 +82,7 @@
 		(mem->npages << PAGE_SHIFT) +
 		VIRTIODESCSPACE - 1ul;
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 /* implemented in priv.c */
diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig
index 99a1f19..6a8d078 100644
--- a/arch/sparc/configs/sparc32_defconfig
+++ b/arch/sparc/configs/sparc32_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.33-rc2
-# Mon Jan 11 23:20:31 2010
+# Linux kernel version: 2.6.33
+# Wed Mar  3 02:52:23 2010
 #
 # CONFIG_64BIT is not set
 CONFIG_SPARC=y
@@ -9,6 +9,8 @@
 # CONFIG_SPARC64 is not set
 CONFIG_ARCH_DEFCONFIG="arch/sparc/configs/sparc32_defconfig"
 CONFIG_BITS=32
+CONFIG_GENERIC_TIME=y
+CONFIG_ARCH_USES_GETTIMEOFFSET=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_MMU=y
 CONFIG_HIGHMEM=y
@@ -48,11 +50,6 @@
 # CONFIG_TREE_RCU_TRACE is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_GROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_RT_GROUP_SCHED=y
-CONFIG_USER_SCHED=y
-# CONFIG_CGROUP_SCHED is not set
 # CONFIG_CGROUPS is not set
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_SYSFS_DEPRECATED_V2=y
@@ -68,6 +65,7 @@
 CONFIG_RD_GZIP=y
 CONFIG_RD_BZIP2=y
 CONFIG_RD_LZMA=y
+CONFIG_RD_LZO=y
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -211,7 +209,6 @@
 CONFIG_PCI=y
 CONFIG_PCI_SYSCALL=y
 # CONFIG_ARCH_SUPPORTS_MSI is not set
-CONFIG_PCI_LEGACY=y
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 # CONFIG_PCI_IOV is not set
@@ -232,7 +229,6 @@
 # Networking options
 #
 CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
 CONFIG_UNIX=y
 CONFIG_XFRM=y
 CONFIG_XFRM_USER=m
@@ -379,11 +375,13 @@
 # CONFIG_TIFM_CORE is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_TI_DAC7512 is not set
 # CONFIG_C2PORT is not set
 
 #
 # EEPROM support
 #
+# CONFIG_EEPROM_AT25 is not set
 # CONFIG_EEPROM_93CX6 is not set
 # CONFIG_CB710_CORE is not set
 CONFIG_HAVE_IDE=y
@@ -507,7 +505,9 @@
 # CONFIG_SUNGEM is not set
 # CONFIG_CASSINI is not set
 # CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_ENC28J60 is not set
 # CONFIG_ETHOC is not set
+# CONFIG_GRETH is not set
 # CONFIG_DNET is not set
 # CONFIG_NET_TULIP is not set
 # CONFIG_HP100 is not set
@@ -521,6 +521,7 @@
 # CONFIG_NET_PCI is not set
 # CONFIG_B44 is not set
 # CONFIG_KS8842 is not set
+# CONFIG_KS8851 is not set
 # CONFIG_KS8851_MLL is not set
 # CONFIG_ATL2 is not set
 CONFIG_NETDEV_1000=y
@@ -563,6 +564,7 @@
 # CONFIG_MLX4_CORE is not set
 # CONFIG_TEHUTI is not set
 # CONFIG_BNX2X is not set
+# CONFIG_QLCNIC is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
 # CONFIG_BE2NET is not set
@@ -665,6 +667,7 @@
 #
 # Non-8250 serial port support
 #
+# CONFIG_SERIAL_MAX3100 is not set
 CONFIG_SERIAL_SUNCORE=y
 CONFIG_SERIAL_SUNZILOG=y
 CONFIG_SERIAL_SUNZILOG_CONSOLE=y
@@ -689,7 +692,23 @@
 # CONFIG_TCG_TPM is not set
 CONFIG_DEVPORT=y
 # CONFIG_I2C is not set
-# CONFIG_SPI is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_BITBANG=m
+CONFIG_SPI_XILINX=m
+CONFIG_SPI_XILINX_PLTFM=m
+# CONFIG_SPI_DESIGNWARE is not set
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
 
 #
 # PPS support
@@ -706,10 +725,13 @@
 #
 # Native drivers
 #
+# CONFIG_SENSORS_ADCXX is not set
 # CONFIG_SENSORS_I5K_AMB is not set
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71882FG is not set
 # CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM70 is not set
+# CONFIG_SENSORS_MAX1111 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
 # CONFIG_SENSORS_SIS5595 is not set
@@ -720,6 +742,7 @@
 # CONFIG_SENSORS_VT8231 is not set
 # CONFIG_SENSORS_W83627HF is not set
 # CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_SENSORS_LIS3_SPI is not set
 # CONFIG_THERMAL is not set
 # CONFIG_WATCHDOG is not set
 CONFIG_SSB_POSSIBLE=y
@@ -736,6 +759,8 @@
 # CONFIG_MFD_SM501 is not set
 # CONFIG_HTC_PASIC3 is not set
 # CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_MC13783 is not set
+# CONFIG_AB4500_CORE is not set
 # CONFIG_REGULATOR is not set
 # CONFIG_MEDIA_SUPPORT is not set
 
@@ -743,6 +768,7 @@
 # Graphics support
 #
 CONFIG_VGA_ARB=y
+CONFIG_VGA_ARB_MAX_GPUS=16
 # CONFIG_VGASTATE is not set
 # CONFIG_VIDEO_OUTPUT_CONTROL is not set
 # CONFIG_FB is not set
@@ -808,6 +834,14 @@
 #
 # SPI RTC drivers
 #
+# CONFIG_RTC_DRV_M41T94 is not set
+# CONFIG_RTC_DRV_DS1305 is not set
+# CONFIG_RTC_DRV_DS1390 is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+# CONFIG_RTC_DRV_R9701 is not set
+# CONFIG_RTC_DRV_RS5C348 is not set
+# CONFIG_RTC_DRV_DS3234 is not set
+# CONFIG_RTC_DRV_PCF2123 is not set
 
 #
 # Platform RTC drivers
@@ -1180,9 +1214,11 @@
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=y
+CONFIG_LZO_DECOMPRESS=y
 CONFIG_DECOMPRESS_GZIP=y
 CONFIG_DECOMPRESS_BZIP2=y
 CONFIG_DECOMPRESS_LZMA=y
+CONFIG_DECOMPRESS_LZO=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 41c5a56..56e3163 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.33-rc2
-# Wed Jan 20 16:31:47 2010
+# Linux kernel version: 2.6.33
+# Wed Mar  3 02:54:29 2010
 #
 CONFIG_64BIT=y
 CONFIG_SPARC=y
@@ -55,14 +55,10 @@
 # CONFIG_RCU_TRACE is not set
 CONFIG_RCU_FANOUT=64
 # CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_RCU_FAST_NO_HZ is not set
 # CONFIG_TREE_RCU_TRACE is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=18
-CONFIG_GROUP_SCHED=y
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_RT_GROUP_SCHED=y
-CONFIG_USER_SCHED=y
-# CONFIG_CGROUP_SCHED is not set
 # CONFIG_CGROUPS is not set
 # CONFIG_SYSFS_DEPRECATED_V2 is not set
 CONFIG_RELAY=y
@@ -77,6 +73,7 @@
 CONFIG_RD_GZIP=y
 CONFIG_RD_BZIP2=y
 CONFIG_RD_LZMA=y
+CONFIG_RD_LZO=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 CONFIG_ANON_INODES=y
@@ -105,7 +102,6 @@
 # Kernel Performance Events And Counters
 #
 CONFIG_PERF_EVENTS=y
-CONFIG_EVENT_PROFILE=y
 CONFIG_PERF_COUNTERS=y
 # CONFIG_DEBUG_PERF_USE_VMALLOC is not set
 CONFIG_VM_EVENT_COUNTERS=y
@@ -266,7 +262,6 @@
 CONFIG_PCI_SYSCALL=y
 CONFIG_ARCH_SUPPORTS_MSI=y
 CONFIG_PCI_MSI=y
-# CONFIG_PCI_LEGACY is not set
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 # CONFIG_PCI_IOV is not set
@@ -290,7 +285,6 @@
 # Networking options
 #
 CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
 CONFIG_XFRM=y
 CONFIG_XFRM_USER=m
@@ -425,10 +419,6 @@
 # CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
-
-#
-# DRBD disabled because PROC_FS, INET or CONNECTOR not selected
-#
 # CONFIG_BLK_DEV_DRBD is not set
 CONFIG_BLK_DEV_NBD=m
 # CONFIG_BLK_DEV_SX8 is not set
@@ -677,6 +667,7 @@
 CONFIG_SUNVNET=m
 # CONFIG_NET_VENDOR_3COM is not set
 # CONFIG_ETHOC is not set
+# CONFIG_GRETH is not set
 # CONFIG_DNET is not set
 # CONFIG_NET_TULIP is not set
 # CONFIG_HP100 is not set
@@ -691,6 +682,7 @@
 # CONFIG_PCNET32 is not set
 # CONFIG_AMD8111_ETH is not set
 # CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_KSZ884X_PCI is not set
 # CONFIG_B44 is not set
 # CONFIG_FORCEDETH is not set
 # CONFIG_E100 is not set
@@ -741,6 +733,7 @@
 # CONFIG_CHELSIO_T3 is not set
 # CONFIG_ENIC is not set
 # CONFIG_IXGBE is not set
+# CONFIG_IXGBEVF is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
 # CONFIG_VXGE is not set
@@ -751,6 +744,7 @@
 # CONFIG_MLX4_CORE is not set
 # CONFIG_TEHUTI is not set
 # CONFIG_BNX2X is not set
+# CONFIG_QLCNIC is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
 # CONFIG_BE2NET is not set
@@ -1028,6 +1022,7 @@
 # CONFIG_SENSORS_SMSC47M192 is not set
 # CONFIG_SENSORS_SMSC47B397 is not set
 # CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_AMC6821 is not set
 # CONFIG_SENSORS_THMC50 is not set
 # CONFIG_SENSORS_TMP401 is not set
 # CONFIG_SENSORS_TMP421 is not set
@@ -1076,6 +1071,7 @@
 # Graphics support
 #
 CONFIG_VGA_ARB=y
+CONFIG_VGA_ARB_MAX_GPUS=16
 # CONFIG_DRM is not set
 # CONFIG_VGASTATE is not set
 # CONFIG_VIDEO_OUTPUT_CONTROL is not set
@@ -1279,6 +1275,7 @@
 # CONFIG_SND_YMFPCI is not set
 CONFIG_SND_USB=y
 # CONFIG_SND_USB_AUDIO is not set
+# CONFIG_SND_USB_UA101 is not set
 # CONFIG_SND_USB_CAIAQ is not set
 CONFIG_SND_SPARC=y
 # CONFIG_SND_SUN_AMD7930 is not set
@@ -1301,6 +1298,7 @@
 #
 # Special HID drivers
 #
+# CONFIG_HID_3M_PCT is not set
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
@@ -1317,14 +1315,19 @@
 CONFIG_HID_LOGITECH=y
 # CONFIG_LOGITECH_FF is not set
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
+# CONFIG_LOGIG940_FF is not set
 CONFIG_HID_MICROSOFT=y
+# CONFIG_HID_MOSART is not set
 CONFIG_HID_MONTEREY=y
 CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
 CONFIG_HID_PANTHERLORD=y
 # CONFIG_PANTHERLORD_FF is not set
 CONFIG_HID_PETALYNX=y
+# CONFIG_HID_QUANTA is not set
 CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
+# CONFIG_HID_STANTUM is not set
 CONFIG_HID_SUNPLUS=y
 CONFIG_HID_GREENASIA=y
 # CONFIG_GREENASIA_FF is not set
@@ -1807,6 +1810,7 @@
 CONFIG_CRYPTO_MANAGER2=y
 CONFIG_CRYPTO_GF128MUL=m
 CONFIG_CRYPTO_NULL=m
+# CONFIG_CRYPTO_PCRYPT is not set
 CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
@@ -1904,9 +1908,11 @@
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=y
+CONFIG_LZO_DECOMPRESS=y
 CONFIG_DECOMPRESS_GZIP=y
 CONFIG_DECOMPRESS_BZIP2=y
 CONFIG_DECOMPRESS_LZMA=y
+CONFIG_DECOMPRESS_LZO=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
index 679c750..2889574 100644
--- a/arch/sparc/include/asm/io_32.h
+++ b/arch/sparc/include/asm/io_32.h
@@ -249,10 +249,14 @@
 
 #define ioread8(X)			readb(X)
 #define ioread16(X)			readw(X)
+#define ioread16be(X)			__raw_readw(X)
 #define ioread32(X)			readl(X)
+#define ioread32be(X)			__raw_readl(X)
 #define iowrite8(val,X)			writeb(val,X)
 #define iowrite16(val,X)		writew(val,X)
+#define iowrite16be(val,X)		__raw_writew(val,X)
 #define iowrite32(val,X)		writel(val,X)
+#define iowrite32be(val,X)		__raw_writel(val,X)
 
 static inline void ioread8_rep(void __iomem *port, void *buf, unsigned long count)
 {
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 4aee21d..9517d06 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -468,10 +468,14 @@
 
 #define ioread8(X)			readb(X)
 #define ioread16(X)			readw(X)
+#define ioread16be(X)			__raw_readw(X)
 #define ioread32(X)			readl(X)
+#define ioread32be(X)			__raw_readl(X)
 #define iowrite8(val,X)			writeb(val,X)
 #define iowrite16(val,X)		writew(val,X)
+#define iowrite16be(val,X)		__raw_writew(val,X)
 #define iowrite32(val,X)		writel(val,X)
+#define iowrite32be(val,X)		__raw_writel(val,X)
 
 /* Create a virtual mapping cookie for an IO port range */
 extern void __iomem *ioport_map(unsigned long port, unsigned int nr);
diff --git a/arch/sparc/include/asm/perfctr.h b/arch/sparc/include/asm/perfctr.h
index 8368730..8d8720a 100644
--- a/arch/sparc/include/asm/perfctr.h
+++ b/arch/sparc/include/asm/perfctr.h
@@ -10,8 +10,8 @@
  * from enumeration below.  The meaning of further arguments
  * are determined by the operation code.
  *
- * int sys_perfctr(int opcode, unsigned long arg0,
- *                 unsigned long arg1, unsigned long arg2)
+ * NOTE: This system call is no longer provided, use the perf_events
+ *       infrastructure.
  *
  * Pointers which are passed by the user are pointers to 64-bit
  * integers.
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index d47a98e..d24cfe1 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -143,15 +143,7 @@
 	 * and 2 stores in this critical code path.  -DaveM
 	 */
 #define switch_to(prev, next, last)					\
-do {	if (test_thread_flag(TIF_PERFCTR)) {				\
-		unsigned long __tmp;					\
-		read_pcr(__tmp);					\
-		current_thread_info()->pcr_reg = __tmp;			\
-		read_pic(__tmp);					\
-		current_thread_info()->kernel_cntd0 += (unsigned int)(__tmp);\
-		current_thread_info()->kernel_cntd1 += ((__tmp) >> 32);	\
-	}								\
-	flush_tlb_pending();						\
+do {	flush_tlb_pending();						\
 	save_and_clear_fpu();						\
 	/* If you are tempted to conditionalize the following */	\
 	/* so that ASI is only written if it changes, think again. */	\
@@ -197,11 +189,6 @@
 	        "l1", "l2", "l3", "l4", "l5", "l6", "l7",		\
 	  "i0", "i1", "i2", "i3", "i4", "i5",				\
 	  "o0", "o1", "o2", "o3", "o4", "o5",       "o7");		\
-	/* If you fuck with this, update ret_from_syscall code too. */	\
-	if (test_thread_flag(TIF_PERFCTR)) {				\
-		write_pcr(current_thread_info()->pcr_reg);		\
-		reset_pic();						\
-	}								\
 } while(0)
 
 static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 39be9f2..9e2d944 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -58,11 +58,6 @@
 	unsigned long		gsr[7];
 	unsigned long		xfsr[7];
 
-	__u64			__user *user_cntd0;
-	__u64			__user *user_cntd1;
-	__u64			kernel_cntd0, kernel_cntd1;
-	__u64			pcr_reg;
-
 	struct restart_block	restart_block;
 
 	struct pt_regs		*kern_una_regs;
@@ -96,15 +91,10 @@
 #define TI_RWIN_SPTRS	0x000003c8
 #define TI_GSR		0x00000400
 #define TI_XFSR		0x00000438
-#define TI_USER_CNTD0	0x00000470
-#define TI_USER_CNTD1	0x00000478
-#define TI_KERN_CNTD0	0x00000480
-#define TI_KERN_CNTD1	0x00000488
-#define TI_PCR		0x00000490
-#define TI_RESTART_BLOCK 0x00000498
-#define TI_KUNA_REGS	0x000004c8
-#define TI_KUNA_INSN	0x000004d0
-#define TI_FPREGS	0x00000500
+#define TI_RESTART_BLOCK 0x00000470
+#define TI_KUNA_REGS	0x000004a0
+#define TI_KUNA_INSN	0x000004a8
+#define TI_FPREGS	0x000004c0
 
 /* We embed this in the uppermost byte of thread_info->flags */
 #define FAULT_CODE_WRITE	0x01	/* Write access, implies D-TLB	   */
@@ -199,7 +189,7 @@
  *
  * On trap return we need to test several values:
  *
- * user:	need_resched, notify_resume, sigpending, wsaved, perfctr
+ * user:	need_resched, notify_resume, sigpending, wsaved
  * kernel:	fpdepth
  *
  * So to check for work in the kernel case we simply load the fpdepth
@@ -220,7 +210,7 @@
 #define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_PERFCTR		4	/* performance counters active */
+/* flag bit 4 is available */
 #define TIF_UNALIGNED		5	/* allowed to do unaligned accesses */
 /* flag bit 6 is available */
 #define TIF_32BIT		7	/* 32-bit binary */
@@ -241,7 +231,6 @@
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_PERFCTR		(1<<TIF_PERFCTR)
 #define _TIF_UNALIGNED		(1<<TIF_UNALIGNED)
 #define _TIF_32BIT		(1<<TIF_32BIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
@@ -252,7 +241,7 @@
 
 #define _TIF_USER_WORK_MASK	((0xff << TI_FLAG_WSAVED_SHIFT) | \
 				 _TIF_DO_NOTIFY_RESUME_MASK | \
-				 _TIF_NEED_RESCHED | _TIF_PERFCTR)
+				 _TIF_NEED_RESCHED)
 #define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING)
 
 /*
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 4f53a23..c011b93 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -48,7 +48,6 @@
 extern unsigned int dcache_parity_tl1_occurred;
 extern unsigned int icache_parity_tl1_occurred;
 
-extern asmlinkage void update_perfctrs(void);
 extern asmlinkage void sparc_breakpoint(struct pt_regs *regs);
 extern void timer_interrupt(int irq, struct pt_regs *regs);
 
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index cb70476..a5cf386 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -352,12 +352,6 @@
 		else
 			t->utraps[0]--;
 	}
-
-	if (test_and_clear_thread_flag(TIF_PERFCTR)) {
-		t->user_cntd0 = t->user_cntd1 = NULL;
-		t->pcr_reg = 0;
-		write_pcr(0);
-	}
 }
 
 void flush_thread(void)
@@ -371,13 +365,6 @@
 
 	set_thread_wsaved(0);
 
-	/* Turn off performance counters if on. */
-	if (test_and_clear_thread_flag(TIF_PERFCTR)) {
-		t->user_cntd0 = t->user_cntd1 = NULL;
-		t->pcr_reg = 0;
-		write_pcr(0);
-	}
-
 	/* Clear FPU register state. */
 	t->fpsaved[0] = 0;
 	
@@ -591,16 +578,6 @@
 		t->kregs->u_regs[UREG_FP] =
 		  ((unsigned long) child_sf) - STACK_BIAS;
 
-		/* Special case, if we are spawning a kernel thread from
-		 * a userspace task (usermode helper, NFS or similar), we
-		 * must disable performance counters in the child because
-		 * the address space and protection realm are changing.
-		 */
-		if (t->flags & _TIF_PERFCTR) {
-			t->user_cntd0 = t->user_cntd1 = NULL;
-			t->pcr_reg = 0;
-			t->flags &= ~_TIF_PERFCTR;
-		}
 		t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
 		t->kregs->u_regs[UREG_G6] = (unsigned long) t;
 		t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 1ddec40..83f1873 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -65,48 +65,6 @@
 		ba,pt			%xcc, __handle_user_windows_continue
 
 		 andn			%l1, %l4, %l1
-__handle_perfctrs:
-		call			update_perfctrs
-		 wrpr			%g0, RTRAP_PSTATE, %pstate
-		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-		ldub			[%g6 + TI_WSAVED], %o2
-		brz,pt			%o2, 1f
-		 nop
-		/* Redo userwin+sched+sig checks */
-		call			fault_in_user_windows
-
-		 wrpr			%g0, RTRAP_PSTATE, %pstate
-		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-		ldx			[%g6 + TI_FLAGS], %l0
-		andcc			%l0, _TIF_NEED_RESCHED, %g0
-		be,pt			%xcc, 1f
-
-		 nop
-		call			schedule
-		 wrpr			%g0, RTRAP_PSTATE, %pstate
-		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-		ldx			[%g6 + TI_FLAGS], %l0
-1:		andcc			%l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
-
-		be,pt			%xcc, __handle_perfctrs_continue
-		 sethi			%hi(TSTATE_PEF), %o0
-		mov			%l5, %o1
-		add			%sp, PTREGS_OFF, %o0
-		mov			%l0, %o2
-		call			do_notify_resume
-
-		 wrpr			%g0, RTRAP_PSTATE, %pstate
-		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
-		/* Signal delivery can modify pt_regs tstate, so we must
-		 * reload it.
-		 */
-		ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
-		sethi			%hi(0xf << 20), %l4
-		and			%l1, %l4, %l4
-		andn			%l1, %l4, %l1
-		ba,pt			%xcc, __handle_perfctrs_continue
-
-		 sethi			%hi(TSTATE_PEF), %o0
 __handle_userfpu:
 		rd			%fprs, %l5
 		andcc			%l5, FPRS_FEF, %g0
@@ -191,9 +149,9 @@
 		 * take until the next local IRQ before the signal/resched
 		 * event would be handled.
 		 *
-		 * This also means that if we have to deal with performance
-		 * counters or user windows, we have to redo all of these
-		 * sched+signal checks with IRQs disabled.
+		 * This also means that if we have to deal with user
+		 * windows, we have to redo all of these sched+signal checks
+		 * with IRQs disabled.
 		 */
 to_user:	wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
 		wrpr			0, %pil
@@ -214,12 +172,8 @@
 		brnz,pn			%o2, __handle_user_windows
 		 nop
 __handle_user_windows_continue:
-		ldx			[%g6 + TI_FLAGS], %l5
-		andcc			%l5, _TIF_PERFCTR, %g0
 		sethi			%hi(TSTATE_PEF), %o0
-		bne,pn			%xcc, __handle_perfctrs
-__handle_perfctrs_continue:
-		 andcc			%l1, %o0, %g0
+		andcc			%l1, %o0, %g0
 
 		/* This fpdepth clear is necessary for non-syscall rtraps only */
 user_nowork:
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index e706113..46a76ba 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -51,7 +51,6 @@
 SIGN1(sys32_wait4, compat_sys_wait4, %o2)
 SIGN1(sys32_creat, sys_creat, %o1)
 SIGN1(sys32_mknod, sys_mknod, %o1)
-SIGN1(sys32_perfctr, sys_perfctr, %o0)
 SIGN1(sys32_umount, sys_umount, %o1)
 SIGN1(sys32_signal, sys_signal, %o0)
 SIGN1(sys32_access, sys_access, %o1)
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index d77f543..cb1bef6 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -27,7 +27,6 @@
 
 #include <asm/uaccess.h>
 #include <asm/utrap.h>
-#include <asm/perfctr.h>
 #include <asm/unistd.h>
 
 #include "entry.h"
@@ -766,109 +765,6 @@
 	return ret;
 }
 
-/* Invoked by rtrap code to update performance counters in
- * user space.
- */
-asmlinkage void update_perfctrs(void)
-{
-	unsigned long pic, tmp;
-
-	read_pic(pic);
-	tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
-	__put_user(tmp, current_thread_info()->user_cntd0);
-	tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
-	__put_user(tmp, current_thread_info()->user_cntd1);
-	reset_pic();
-}
-
-SYSCALL_DEFINE4(perfctr, int, opcode, unsigned long, arg0,
-		unsigned long, arg1, unsigned long, arg2)
-{
-	int err = 0;
-
-	switch(opcode) {
-	case PERFCTR_ON:
-		current_thread_info()->pcr_reg = arg2;
-		current_thread_info()->user_cntd0 = (u64 __user *) arg0;
-		current_thread_info()->user_cntd1 = (u64 __user *) arg1;
-		current_thread_info()->kernel_cntd0 =
-			current_thread_info()->kernel_cntd1 = 0;
-		write_pcr(arg2);
-		reset_pic();
-		set_thread_flag(TIF_PERFCTR);
-		break;
-
-	case PERFCTR_OFF:
-		err = -EINVAL;
-		if (test_thread_flag(TIF_PERFCTR)) {
-			current_thread_info()->user_cntd0 =
-				current_thread_info()->user_cntd1 = NULL;
-			current_thread_info()->pcr_reg = 0;
-			write_pcr(0);
-			clear_thread_flag(TIF_PERFCTR);
-			err = 0;
-		}
-		break;
-
-	case PERFCTR_READ: {
-		unsigned long pic, tmp;
-
-		if (!test_thread_flag(TIF_PERFCTR)) {
-			err = -EINVAL;
-			break;
-		}
-		read_pic(pic);
-		tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
-		err |= __put_user(tmp, current_thread_info()->user_cntd0);
-		tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
-		err |= __put_user(tmp, current_thread_info()->user_cntd1);
-		reset_pic();
-		break;
-	}
-
-	case PERFCTR_CLRPIC:
-		if (!test_thread_flag(TIF_PERFCTR)) {
-			err = -EINVAL;
-			break;
-		}
-		current_thread_info()->kernel_cntd0 =
-			current_thread_info()->kernel_cntd1 = 0;
-		reset_pic();
-		break;
-
-	case PERFCTR_SETPCR: {
-		u64 __user *user_pcr = (u64 __user *)arg0;
-
-		if (!test_thread_flag(TIF_PERFCTR)) {
-			err = -EINVAL;
-			break;
-		}
-		err |= __get_user(current_thread_info()->pcr_reg, user_pcr);
-		write_pcr(current_thread_info()->pcr_reg);
-		current_thread_info()->kernel_cntd0 =
-			current_thread_info()->kernel_cntd1 = 0;
-		reset_pic();
-		break;
-	}
-
-	case PERFCTR_GETPCR: {
-		u64 __user *user_pcr = (u64 __user *)arg0;
-
-		if (!test_thread_flag(TIF_PERFCTR)) {
-			err = -EINVAL;
-			break;
-		}
-		err |= __put_user(current_thread_info()->pcr_reg, user_pcr);
-		break;
-	}
-
-	default:
-		err = -EINVAL;
-		break;
-	};
-	return err;
-}
-
 /*
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index dc4a458..1d7e274 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -110,31 +110,12 @@
 
 	.globl	ret_from_syscall
 ret_from_syscall:
-	/* Clear current_thread_info()->new_child, and
-	 * check performance counter stuff too.
-	 */
+	/* Clear current_thread_info()->new_child. */
 	stb	%g0, [%g6 + TI_NEW_CHILD]
 	ldx	[%g6 + TI_FLAGS], %l0
 	call	schedule_tail
 	 mov	%g7, %o0
-	andcc	%l0, _TIF_PERFCTR, %g0
-	be,pt	%icc, 1f
-	 nop
-	ldx	[%g6 + TI_PCR], %o7
-	wr	%g0, %o7, %pcr
-
-	/* Blackbird errata workaround.  See commentary in
-	 * smp.c:smp_percpu_timer_interrupt() for more
-	 * information.
-	 */
-	ba,pt	%xcc, 99f
-	 nop
-
-	.align	64
-99:	wr	%g0, %g0, %pic
-	rd	%pic, %g0
-
-1:	ba,pt	%xcc, ret_sys_call
+	ba,pt	%xcc, ret_sys_call
 	 ldx	[%sp + PTREGS_OFF + PT_V9_I0], %o0
 
 	.globl	sparc_exit
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index d2f999a..68312fe 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -36,8 +36,6 @@
 					struct sigaction __user *oact,
 					void __user *restorer,
 					size_t sigsetsize);
-extern asmlinkage long sys_perfctr(int opcode, unsigned long arg0,
-				   unsigned long arg1, unsigned long arg2);
 
 extern asmlinkage void sparc64_set_context(struct pt_regs *regs);
 extern asmlinkage void sparc64_get_context(struct pt_regs *regs);
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index e575b46..1761425 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -21,7 +21,7 @@
 /*0*/	.word sys_restart_syscall, sys32_exit, sys_fork, sys_read, sys_write
 /*5*/	.word sys32_open, sys_close, sys32_wait4, sys32_creat, sys_link
 /*10*/  .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys32_mknod
-/*15*/	.word sys_chmod, sys_lchown16, sys_brk, sys32_perfctr, sys32_lseek
+/*15*/	.word sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, sys32_lseek
 /*20*/	.word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
 /*25*/	.word sys32_vmsplice, compat_sys_ptrace, sys_alarm, sys32_sigaltstack, sys_pause
 /*30*/	.word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice
@@ -96,7 +96,7 @@
 /*0*/	.word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write
 /*5*/	.word sys_open, sys_close, sys_wait4, sys_creat, sys_link
 /*10*/  .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod
-/*15*/	.word sys_chmod, sys_lchown, sys_brk, sys_perfctr, sys_lseek
+/*15*/	.word sys_chmod, sys_lchown, sys_brk, sys_nis_syscall, sys_lseek
 /*20*/	.word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid
 /*25*/	.word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall
 /*30*/	.word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 10f7bb9..bdc05a2 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2548,15 +2548,6 @@
 					       rwbuf_stkptrs) ||
 		     TI_GSR != offsetof(struct thread_info, gsr) ||
 		     TI_XFSR != offsetof(struct thread_info, xfsr) ||
-		     TI_USER_CNTD0 != offsetof(struct thread_info,
-					       user_cntd0) ||
-		     TI_USER_CNTD1 != offsetof(struct thread_info,
-					       user_cntd1) ||
-		     TI_KERN_CNTD0 != offsetof(struct thread_info,
-					       kernel_cntd0) ||
-		     TI_KERN_CNTD1 != offsetof(struct thread_info,
-					       kernel_cntd1) ||
-		     TI_PCR != offsetof(struct thread_info, pcr_reg) ||
 		     TI_PRE_COUNT != offsetof(struct thread_info,
 					      preempt_count) ||
 		     TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c
index 4b7c937..2d8b70d 100644
--- a/arch/sparc/prom/p1275.c
+++ b/arch/sparc/prom/p1275.c
@@ -32,10 +32,9 @@
 extern void prom_cif_callback(void);
 
 /*
- * This provides SMP safety on the p1275buf. prom_callback() drops this lock
- * to allow recursuve acquisition.
+ * This provides SMP safety on the p1275buf.
  */
-DEFINE_SPINLOCK(prom_entry_lock);
+DEFINE_RAW_SPINLOCK(prom_entry_lock);
 
 long p1275_cmd(const char *service, long fmt, ...)
 {
@@ -47,7 +46,9 @@
 	
 	p = p1275buf.prom_buffer;
 
-	spin_lock_irqsave(&prom_entry_lock, flags);
+	raw_local_save_flags(flags);
+	raw_local_irq_restore(PIL_NMI);
+	raw_spin_lock(&prom_entry_lock);
 
 	p1275buf.prom_args[0] = (unsigned long)p;		/* service */
 	strcpy (p, service);
@@ -139,7 +140,8 @@
 	va_end(list);
 	x = p1275buf.prom_args [nargs + 3];
 
-	spin_unlock_irqrestore(&prom_entry_lock, flags);
+	raw_spin_unlock(&prom_entry_lock);
+	raw_local_irq_restore(flags);
 
 	return x;
 }
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 3b3c366..de317d0 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -140,7 +140,7 @@
 		goto out;
 	}
 
-	err = may_open(&nd.path, MAY_READ, FMODE_READ);
+	err = may_open(&nd.path, MAY_READ, O_RDONLY);
 	if (result) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		path_put(&nd.path);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 57ccdce..f15f37b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -31,6 +31,7 @@
 	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_DMA_ATTRS
 	select HAVE_KRETPROBES
+	select HAVE_OPTPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 9f828f8..493092e 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,6 +11,7 @@
 header-y += ucontext.h
 header-y += processor-flags.h
 header-y += hw_breakpoint.h
+header-y += hyperv.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index f1e253c..b09ec55 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -165,10 +165,12 @@
  * invalid instruction possible) or if the instructions are changed from a
  * consistent state to another consistent state atomically.
  * More care must be taken when modifying code in the SMP case because of
- * Intel's errata.
+ * Intel's errata. text_poke_smp() takes care that errata, but still
+ * doesn't support NMI/MCE handler code modifying.
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
  */
 extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h
new file mode 100644
index 0000000..e153a2b
--- /dev/null
+++ b/arch/x86/include/asm/hyperv.h
@@ -0,0 +1,186 @@
+#ifndef _ASM_X86_KVM_HYPERV_H
+#define _ASM_X86_KVM_HYPERV_H
+
+#include <linux/types.h>
+
+/*
+ * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
+ * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
+ */
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS	0x40000000
+#define HYPERV_CPUID_INTERFACE			0x40000001
+#define HYPERV_CPUID_VERSION			0x40000002
+#define HYPERV_CPUID_FEATURES			0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO		0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS		0x40000005
+
+/*
+ * Feature identification. EAX indicates which features are available
+ * to the partition based upon the current partition privileges.
+ */
+
+/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
+#define HV_X64_MSR_VP_RUNTIME_AVAILABLE		(1 << 0)
+/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
+#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
+/*
+ * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
+ * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
+ */
+#define HV_X64_MSR_SYNIC_AVAILABLE		(1 << 2)
+/*
+ * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
+ * HV_X64_MSR_STIMER3_COUNT) available
+ */
+#define HV_X64_MSR_SYNTIMER_AVAILABLE		(1 << 3)
+/*
+ * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
+ * are available
+ */
+#define HV_X64_MSR_APIC_ACCESS_AVAILABLE	(1 << 4)
+/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
+#define HV_X64_MSR_HYPERCALL_AVAILABLE		(1 << 5)
+/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
+#define HV_X64_MSR_VP_INDEX_AVAILABLE		(1 << 6)
+/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
+#define HV_X64_MSR_RESET_AVAILABLE		(1 << 7)
+ /*
+  * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
+  * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
+  * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
+  */
+#define HV_X64_MSR_STAT_PAGES_AVAILABLE		(1 << 8)
+
+/*
+ * Feature identification: EBX indicates which flags were specified at
+ * partition creation. The format is the same as the partition creation
+ * flag structure defined in section Partition Creation Flags.
+ */
+#define HV_X64_CREATE_PARTITIONS		(1 << 0)
+#define HV_X64_ACCESS_PARTITION_ID		(1 << 1)
+#define HV_X64_ACCESS_MEMORY_POOL		(1 << 2)
+#define HV_X64_ADJUST_MESSAGE_BUFFERS		(1 << 3)
+#define HV_X64_POST_MESSAGES			(1 << 4)
+#define HV_X64_SIGNAL_EVENTS			(1 << 5)
+#define HV_X64_CREATE_PORT			(1 << 6)
+#define HV_X64_CONNECT_PORT			(1 << 7)
+#define HV_X64_ACCESS_STATS			(1 << 8)
+#define HV_X64_DEBUGGING			(1 << 11)
+#define HV_X64_CPU_POWER_MANAGEMENT		(1 << 12)
+#define HV_X64_CONFIGURE_PROFILER		(1 << 13)
+
+/*
+ * Feature identification. EDX indicates which miscellaneous features
+ * are available to the partition.
+ */
+/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
+#define HV_X64_MWAIT_AVAILABLE				(1 << 0)
+/* Guest debugging support is available */
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE		(1 << 1)
+/* Performance Monitor support is available*/
+#define HV_X64_PERF_MONITOR_AVAILABLE			(1 << 2)
+/* Support for physical CPU dynamic partitioning events is available*/
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE	(1 << 3)
+/*
+ * Support for passing hypercall input parameter block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE		(1 << 4)
+/* Support for a virtual guest idle state is available */
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE		(1 << 5)
+
+/*
+ * Implementation recommendations. Indicates which behaviors the hypervisor
+ * recommends the OS implement for optimal performance.
+ */
+ /*
+  * Recommend using hypercall for address space switches rather
+  * than MOV to CR3 instruction
+  */
+#define HV_X64_MWAIT_RECOMMENDED		(1 << 0)
+/* Recommend using hypercall for local TLB flushes rather
+ * than INVLPG or MOV to CR3 instructions */
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED	(1 << 1)
+/*
+ * Recommend using hypercall for remote TLB flushes rather
+ * than inter-processor interrupts
+ */
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED	(1 << 2)
+/*
+ * Recommend using MSRs for accessing APIC registers
+ * EOI, ICR and TPR rather than their memory-mapped counterparts
+ */
+#define HV_X64_APIC_ACCESS_RECOMMENDED		(1 << 3)
+/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
+#define HV_X64_SYSTEM_RESET_RECOMMENDED		(1 << 4)
+/*
+ * Recommend using relaxed timing for this partition. If used,
+ * the VM should disable any watchdog timeouts that rely on the
+ * timely delivery of external interrupts
+ */
+#define HV_X64_RELAXED_TIMING_RECOMMENDED	(1 << 5)
+
+/* MSR used to identify the guest OS. */
+#define HV_X64_MSR_GUEST_OS_ID			0x40000000
+
+/* MSR used to setup pages used to communicate with the hypervisor. */
+#define HV_X64_MSR_HYPERCALL			0x40000001
+
+/* MSR used to provide vcpu index */
+#define HV_X64_MSR_VP_INDEX			0x40000002
+
+/* Define the virtual APIC registers */
+#define HV_X64_MSR_EOI				0x40000070
+#define HV_X64_MSR_ICR				0x40000071
+#define HV_X64_MSR_TPR				0x40000072
+#define HV_X64_MSR_APIC_ASSIST_PAGE		0x40000073
+
+/* Define synthetic interrupt controller model specific registers. */
+#define HV_X64_MSR_SCONTROL			0x40000080
+#define HV_X64_MSR_SVERSION			0x40000081
+#define HV_X64_MSR_SIEFP			0x40000082
+#define HV_X64_MSR_SIMP				0x40000083
+#define HV_X64_MSR_EOM				0x40000084
+#define HV_X64_MSR_SINT0			0x40000090
+#define HV_X64_MSR_SINT1			0x40000091
+#define HV_X64_MSR_SINT2			0x40000092
+#define HV_X64_MSR_SINT3			0x40000093
+#define HV_X64_MSR_SINT4			0x40000094
+#define HV_X64_MSR_SINT5			0x40000095
+#define HV_X64_MSR_SINT6			0x40000096
+#define HV_X64_MSR_SINT7			0x40000097
+#define HV_X64_MSR_SINT8			0x40000098
+#define HV_X64_MSR_SINT9			0x40000099
+#define HV_X64_MSR_SINT10			0x4000009A
+#define HV_X64_MSR_SINT11			0x4000009B
+#define HV_X64_MSR_SINT12			0x4000009C
+#define HV_X64_MSR_SINT13			0x4000009D
+#define HV_X64_MSR_SINT14			0x4000009E
+#define HV_X64_MSR_SINT15			0x4000009F
+
+
+#define HV_X64_MSR_HYPERCALL_ENABLE		0x00000001
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
+
+/* Declare the various hypercall operations. */
+#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT		0x0008
+
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE		0x00000001
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+#define HV_PROCESSOR_POWER_STATE_C0		0
+#define HV_PROCESSOR_POWER_STATE_C1		1
+#define HV_PROCESSOR_POWER_STATE_C2		2
+#define HV_PROCESSOR_POWER_STATE_C3		3
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS			0
+#define HV_STATUS_INVALID_HYPERCALL_CODE	2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT	3
+#define HV_STATUS_INVALID_ALIGNMENT		4
+
+#endif
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4fe681d..4ffa345 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -32,7 +32,10 @@
 
 typedef u8 kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xcc
-#define RELATIVEJUMP_INSTRUCTION 0xe9
+#define RELATIVEJUMP_OPCODE 0xe9
+#define RELATIVEJUMP_SIZE 5
+#define RELATIVECALL_OPCODE 0xe8
+#define RELATIVE_ADDR_SIZE 4
 #define MAX_INSN_SIZE 16
 #define MAX_STACK_SIZE 64
 #define MIN_STACK_SIZE(ADDR)					       \
@@ -44,6 +47,17 @@
 
 #define flush_insn_slot(p)	do { } while (0)
 
+/* optinsn template addresses */
+extern kprobe_opcode_t optprobe_template_entry;
+extern kprobe_opcode_t optprobe_template_val;
+extern kprobe_opcode_t optprobe_template_call;
+extern kprobe_opcode_t optprobe_template_end;
+#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
+#define MAX_OPTINSN_SIZE 				\
+	(((unsigned long)&optprobe_template_end -	\
+	  (unsigned long)&optprobe_template_entry) +	\
+	 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
+
 extern const int kretprobe_blacklist_size;
 
 void arch_remove_kprobe(struct kprobe *p);
@@ -64,6 +78,21 @@
 	int boostable;
 };
 
+struct arch_optimized_insn {
+	/* copy of the original instructions */
+	kprobe_opcode_t copied_insn[RELATIVE_ADDR_SIZE];
+	/* detour code buffer */
+	kprobe_opcode_t *insn;
+	/* the size of instructions copied to detour code buffer */
+	size_t size;
+};
+
+/* Return true (!0) if optinsn is prepared for optimization. */
+static inline int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+	return optinsn->size;
+}
+
 struct prev_kprobe {
 	struct kprobe *kp;
 	unsigned long status;
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7c18e12..7a6f54f 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -54,13 +54,23 @@
 struct x86_emulate_ops {
 	/*
 	 * read_std: Read bytes of standard (non-emulated/special) memory.
-	 *           Used for instruction fetch, stack operations, and others.
+	 *           Used for descriptor reading.
 	 *  @addr:  [IN ] Linear address from which to read.
 	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
 	 *  @bytes: [IN ] Number of bytes to read from memory.
 	 */
 	int (*read_std)(unsigned long addr, void *val,
-			unsigned int bytes, struct kvm_vcpu *vcpu);
+			unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
+
+	/*
+	 * fetch: Read bytes of standard (non-emulated/special) memory.
+	 *        Used for instruction fetch.
+	 *  @addr:  [IN ] Linear address from which to read.
+	 *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
+	 *  @bytes: [IN ] Number of bytes to read from memory.
+	 */
+	int (*fetch)(unsigned long addr, void *val,
+			unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
 
 	/*
 	 * read_emulated: Read bytes from emulated/special memory area.
@@ -74,7 +84,7 @@
 			     struct kvm_vcpu *vcpu);
 
 	/*
-	 * write_emulated: Read bytes from emulated/special memory area.
+	 * write_emulated: Write bytes to emulated/special memory area.
 	 *  @addr:  [IN ] Linear address to which to write.
 	 *  @val:   [IN ] Value to write to memory (low-order bytes used as
 	 *                required).
@@ -168,6 +178,7 @@
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
+#define X86EMUL_MODE_VM86     1	/* Virtual 8086 mode.     */
 #define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
 #define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
 #define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f865e8..06d9e79 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -25,7 +25,7 @@
 #include <asm/mtrr.h>
 #include <asm/msr-index.h>
 
-#define KVM_MAX_VCPUS 16
+#define KVM_MAX_VCPUS 64
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
@@ -38,19 +38,6 @@
 #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |	\
 				  0xFFFFFF0000000000ULL)
 
-#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
-	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
-#define KVM_GUEST_CR0_MASK						\
-	(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
-	(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
-#define KVM_VM_CR0_ALWAYS_ON						\
-	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_GUEST_CR4_MASK						\
-	(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
-#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
-#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
-
 #define INVALID_PAGE (~(hpa_t)0)
 #define UNMAPPED_GVA (~(gpa_t)0)
 
@@ -256,7 +243,8 @@
 	void (*new_cr3)(struct kvm_vcpu *vcpu);
 	int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
 	void (*free)(struct kvm_vcpu *vcpu);
-	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
+			    u32 *error);
 	void (*prefetch_page)(struct kvm_vcpu *vcpu,
 			      struct kvm_mmu_page *page);
 	int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -282,13 +270,15 @@
 	u32 regs_dirty;
 
 	unsigned long cr0;
+	unsigned long cr0_guest_owned_bits;
 	unsigned long cr2;
 	unsigned long cr3;
 	unsigned long cr4;
+	unsigned long cr4_guest_owned_bits;
 	unsigned long cr8;
 	u32 hflags;
 	u64 pdptrs[4]; /* pae */
-	u64 shadow_efer;
+	u64 efer;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
 	int32_t apic_arb_prio;
@@ -374,17 +364,27 @@
 	/* used for guest single stepping over the given code position */
 	u16 singlestep_cs;
 	unsigned long singlestep_rip;
+	/* fields used by HYPER-V emulation */
+	u64 hv_vapic;
 };
 
 struct kvm_mem_alias {
 	gfn_t base_gfn;
 	unsigned long npages;
 	gfn_t target_gfn;
+#define KVM_ALIAS_INVALID     1UL
+	unsigned long flags;
 };
 
-struct kvm_arch{
-	int naliases;
+#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+
+struct kvm_mem_aliases {
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
+	int naliases;
+};
+
+struct kvm_arch {
+	struct kvm_mem_aliases *aliases;
 
 	unsigned int n_free_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -416,6 +416,10 @@
 	s64 kvmclock_offset;
 
 	struct kvm_xen_hvm_config xen_hvm_config;
+
+	/* fields used by HYPER-V emulation */
+	u64 hv_guest_os_id;
+	u64 hv_hypercall;
 };
 
 struct kvm_vm_stat {
@@ -471,6 +475,7 @@
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
 	bool (*cpu_has_accelerated_tpr)(void);
+	void (*cpuid_update)(struct kvm_vcpu *vcpu);
 
 	/* Create, but do not attach this VCPU */
 	struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
@@ -492,6 +497,7 @@
 	void (*set_segment)(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 	void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+	void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
 	void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
 	void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
 	void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -501,12 +507,13 @@
 	void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
 	void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
 	void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
-	unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr);
-	void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value,
-		       int *exception);
+	int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest);
+	int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value);
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+	void (*fpu_activate)(struct kvm_vcpu *vcpu);
+	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
 
@@ -531,7 +538,8 @@
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
-	bool (*gb_page_enable)(void);
+	int (*get_lpage_level)(void);
+	bool (*rdtscp_supported)(void);
 
 	const struct trace_print_flags *exit_reasons_str;
 };
@@ -606,8 +614,7 @@
 		    unsigned long value);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
-				int type_bits, int seg);
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
 
@@ -653,6 +660,10 @@
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
@@ -666,6 +677,7 @@
 
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 int complete_pio(struct kvm_vcpu *vcpu);
+bool kvm_check_iopl(struct kvm_vcpu *vcpu);
 
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
 
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c584076..ffae142 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_KVM_PARA_H
 
 #include <linux/types.h>
+#include <asm/hyperv.h>
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1fecb7e..38638cd 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -313,7 +313,7 @@
 
 #define SVM_EXIT_ERR		-1
 
-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
 
 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
 #define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2b49454..fb9a080 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -53,6 +53,7 @@
  */
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
+#define SECONDARY_EXEC_RDTSCP			0x00000008
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
@@ -251,6 +252,7 @@
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
 #define EXIT_REASON_MCE_DURING_VMENTRY	 41
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
@@ -362,6 +364,7 @@
 #define VMX_EPTP_UC_BIT				(1ull << 8)
 #define VMX_EPTP_WB_BIT				(1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
+#define VMX_EPT_1GB_PAGE_BIT			(1ull << 17)
 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
@@ -374,7 +377,7 @@
 #define VMX_EPT_READABLE_MASK			0x1ull
 #define VMX_EPT_WRITABLE_MASK			0x2ull
 #define VMX_EPT_EXECUTABLE_MASK			0x4ull
-#define VMX_EPT_IGMT_BIT    			(1ull << 6)
+#define VMX_EPT_IPAT_BIT    			(1ull << 6)
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e6ea034..3a4bf35 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/memory.h>
+#include <linux/stop_machine.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
@@ -572,3 +573,62 @@
 	local_irq_restore(flags);
 	return addr;
 }
+
+/*
+ * Cross-modifying kernel text with stop_machine().
+ * This code originally comes from immediate value.
+ */
+static atomic_t stop_machine_first;
+static int wrote_text;
+
+struct text_poke_params {
+	void *addr;
+	const void *opcode;
+	size_t len;
+};
+
+static int __kprobes stop_machine_text_poke(void *data)
+{
+	struct text_poke_params *tpp = data;
+
+	if (atomic_dec_and_test(&stop_machine_first)) {
+		text_poke(tpp->addr, tpp->opcode, tpp->len);
+		smp_wmb();	/* Make sure other cpus see that this has run */
+		wrote_text = 1;
+	} else {
+		while (!wrote_text)
+			cpu_relax();
+		smp_mb();	/* Load wrote_text before following execution */
+	}
+
+	flush_icache_range((unsigned long)tpp->addr,
+			   (unsigned long)tpp->addr + tpp->len);
+	return 0;
+}
+
+/**
+ * text_poke_smp - Update instructions on a live kernel on SMP
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. This allows
+ * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
+ * should be allowed, since stop_machine() does _not_ protect code against
+ * NMI and MCE.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
+{
+	struct text_poke_params tpp;
+
+	tpp.addr = addr;
+	tpp.opcode = opcode;
+	tpp.len = len;
+	atomic_set(&stop_machine_first, 1);
+	wrote_text = 0;
+	stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+	return addr;
+}
+
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index fe4622e..79556bd 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -145,6 +145,7 @@
 
 /**
  * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
+ * @info: pointer to mtrr configuration data
  *
  * Returns nothing.
  */
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5de9f4a..b43bbae 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -49,6 +49,7 @@
 #include <linux/module.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
+#include <linux/ftrace.h>
 
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
@@ -106,16 +107,22 @@
 };
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
-/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes set_jmp_op(void *from, void *to)
+static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
 {
-	struct __arch_jmp_op {
-		char op;
+	struct __arch_relative_insn {
+		u8 op;
 		s32 raddr;
-	} __attribute__((packed)) * jop;
-	jop = (struct __arch_jmp_op *)from;
-	jop->raddr = (s32)((long)(to) - ((long)(from) + 5));
-	jop->op = RELATIVEJUMP_INSTRUCTION;
+	} __attribute__((packed)) *insn;
+
+	insn = (struct __arch_relative_insn *)from;
+	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
+	insn->op = op;
+}
+
+/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
+static void __kprobes synthesize_reljump(void *from, void *to)
+{
+	__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
 }
 
 /*
@@ -202,7 +209,7 @@
 	/*
 	 *  Basically, kp->ainsn.insn has an original instruction.
 	 *  However, RIP-relative instruction can not do single-stepping
-	 *  at different place, fix_riprel() tweaks the displacement of
+	 *  at different place, __copy_instruction() tweaks the displacement of
 	 *  that instruction. In that case, we can't recover the instruction
 	 *  from the kp->ainsn.insn.
 	 *
@@ -284,21 +291,37 @@
 }
 
 /*
- * Adjust the displacement if the instruction uses the %rip-relative
- * addressing mode.
+ * Copy an instruction and adjust the displacement if the instruction
+ * uses the %rip-relative addressing mode.
  * If it does, Return the address of the 32-bit displacement word.
  * If not, return null.
  * Only applicable to 64-bit x86.
  */
-static void __kprobes fix_riprel(struct kprobe *p)
+static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
 {
-#ifdef CONFIG_X86_64
 	struct insn insn;
-	kernel_insn_init(&insn, p->ainsn.insn);
+	int ret;
+	kprobe_opcode_t buf[MAX_INSN_SIZE];
 
+	kernel_insn_init(&insn, src);
+	if (recover) {
+		insn_get_opcode(&insn);
+		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+			ret = recover_probed_instruction(buf,
+							 (unsigned long)src);
+			if (ret)
+				return 0;
+			kernel_insn_init(&insn, buf);
+		}
+	}
+	insn_get_length(&insn);
+	memcpy(dest, insn.kaddr, insn.length);
+
+#ifdef CONFIG_X86_64
 	if (insn_rip_relative(&insn)) {
 		s64 newdisp;
 		u8 *disp;
+		kernel_insn_init(&insn, dest);
 		insn_get_displacement(&insn);
 		/*
 		 * The copied instruction uses the %rip-relative addressing
@@ -312,20 +335,23 @@
 		 * extension of the original signed 32-bit displacement would
 		 * have given.
 		 */
-		newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
-			  (u8 *) p->ainsn.insn;
+		newdisp = (u8 *) src + (s64) insn.displacement.value -
+			  (u8 *) dest;
 		BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check.  */
-		disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+		disp = (u8 *) dest + insn_offset_displacement(&insn);
 		*(s32 *) disp = (s32) newdisp;
 	}
 #endif
+	return insn.length;
 }
 
 static void __kprobes arch_copy_kprobe(struct kprobe *p)
 {
-	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
-
-	fix_riprel(p);
+	/*
+	 * Copy an instruction without recovering int3, because it will be
+	 * put by another subsystem.
+	 */
+	__copy_instruction(p->ainsn.insn, p->addr, 0);
 
 	if (can_boost(p->addr))
 		p->ainsn.boostable = 0;
@@ -406,18 +432,6 @@
 		update_debugctlmsr(current->thread.debugctlmsr);
 }
 
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
-{
-	clear_btf();
-	regs->flags |= X86_EFLAGS_TF;
-	regs->flags &= ~X86_EFLAGS_IF;
-	/* single step inline if the instruction is an int3 */
-	if (p->opcode == BREAKPOINT_INSTRUCTION)
-		regs->ip = (unsigned long)p->addr;
-	else
-		regs->ip = (unsigned long)p->ainsn.insn;
-}
-
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
 {
@@ -429,20 +443,50 @@
 	*sara = (unsigned long) &kretprobe_trampoline;
 }
 
+#ifdef CONFIG_OPTPROBES
+static int  __kprobes setup_detour_execution(struct kprobe *p,
+					     struct pt_regs *regs,
+					     int reenter);
+#else
+#define setup_detour_execution(p, regs, reenter) (0)
+#endif
+
 static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
-				       struct kprobe_ctlblk *kcb)
+				       struct kprobe_ctlblk *kcb, int reenter)
 {
+	if (setup_detour_execution(p, regs, reenter))
+		return;
+
 #if !defined(CONFIG_PREEMPT)
 	if (p->ainsn.boostable == 1 && !p->post_handler) {
 		/* Boost up -- we can execute copied instructions directly */
-		reset_current_kprobe();
+		if (!reenter)
+			reset_current_kprobe();
+		/*
+		 * Reentering boosted probe doesn't reset current_kprobe,
+		 * nor set current_kprobe, because it doesn't use single
+		 * stepping.
+		 */
 		regs->ip = (unsigned long)p->ainsn.insn;
 		preempt_enable_no_resched();
 		return;
 	}
 #endif
-	prepare_singlestep(p, regs);
-	kcb->kprobe_status = KPROBE_HIT_SS;
+	if (reenter) {
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p, regs, kcb);
+		kcb->kprobe_status = KPROBE_REENTER;
+	} else
+		kcb->kprobe_status = KPROBE_HIT_SS;
+	/* Prepare real single stepping */
+	clear_btf();
+	regs->flags |= X86_EFLAGS_TF;
+	regs->flags &= ~X86_EFLAGS_IF;
+	/* single step inline if the instruction is an int3 */
+	if (p->opcode == BREAKPOINT_INSTRUCTION)
+		regs->ip = (unsigned long)p->addr;
+	else
+		regs->ip = (unsigned long)p->ainsn.insn;
 }
 
 /*
@@ -456,11 +500,8 @@
 	switch (kcb->kprobe_status) {
 	case KPROBE_HIT_SSDONE:
 	case KPROBE_HIT_ACTIVE:
-		save_previous_kprobe(kcb);
-		set_current_kprobe(p, regs, kcb);
 		kprobes_inc_nmissed_count(p);
-		prepare_singlestep(p, regs);
-		kcb->kprobe_status = KPROBE_REENTER;
+		setup_singlestep(p, regs, kcb, 1);
 		break;
 	case KPROBE_HIT_SS:
 		/* A probe has been hit in the codepath leading up to, or just
@@ -535,13 +576,13 @@
 			 * more here.
 			 */
 			if (!p->pre_handler || !p->pre_handler(p, regs))
-				setup_singlestep(p, regs, kcb);
+				setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
 	} else if (kprobe_running()) {
 		p = __get_cpu_var(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
-			setup_singlestep(p, regs, kcb);
+			setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
 	} /* else: not a kprobe fault; let the kernel handle it */
@@ -550,6 +591,69 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
+#define SAVE_REGS_STRING		\
+	/* Skip cs, ip, orig_ax. */	\
+	"	subq $24, %rsp\n"	\
+	"	pushq %rdi\n"		\
+	"	pushq %rsi\n"		\
+	"	pushq %rdx\n"		\
+	"	pushq %rcx\n"		\
+	"	pushq %rax\n"		\
+	"	pushq %r8\n"		\
+	"	pushq %r9\n"		\
+	"	pushq %r10\n"		\
+	"	pushq %r11\n"		\
+	"	pushq %rbx\n"		\
+	"	pushq %rbp\n"		\
+	"	pushq %r12\n"		\
+	"	pushq %r13\n"		\
+	"	pushq %r14\n"		\
+	"	pushq %r15\n"
+#define RESTORE_REGS_STRING		\
+	"	popq %r15\n"		\
+	"	popq %r14\n"		\
+	"	popq %r13\n"		\
+	"	popq %r12\n"		\
+	"	popq %rbp\n"		\
+	"	popq %rbx\n"		\
+	"	popq %r11\n"		\
+	"	popq %r10\n"		\
+	"	popq %r9\n"		\
+	"	popq %r8\n"		\
+	"	popq %rax\n"		\
+	"	popq %rcx\n"		\
+	"	popq %rdx\n"		\
+	"	popq %rsi\n"		\
+	"	popq %rdi\n"		\
+	/* Skip orig_ax, ip, cs */	\
+	"	addq $24, %rsp\n"
+#else
+#define SAVE_REGS_STRING		\
+	/* Skip cs, ip, orig_ax and gs. */	\
+	"	subl $16, %esp\n"	\
+	"	pushl %fs\n"		\
+	"	pushl %ds\n"		\
+	"	pushl %es\n"		\
+	"	pushl %eax\n"		\
+	"	pushl %ebp\n"		\
+	"	pushl %edi\n"		\
+	"	pushl %esi\n"		\
+	"	pushl %edx\n"		\
+	"	pushl %ecx\n"		\
+	"	pushl %ebx\n"
+#define RESTORE_REGS_STRING		\
+	"	popl %ebx\n"		\
+	"	popl %ecx\n"		\
+	"	popl %edx\n"		\
+	"	popl %esi\n"		\
+	"	popl %edi\n"		\
+	"	popl %ebp\n"		\
+	"	popl %eax\n"		\
+	/* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
+	"	addl $24, %esp\n"
+#endif
+
 /*
  * When a retprobed function returns, this code saves registers and
  * calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -563,65 +667,16 @@
 			/* We don't bother saving the ss register */
 			"	pushq %rsp\n"
 			"	pushfq\n"
-			/*
-			 * Skip cs, ip, orig_ax.
-			 * trampoline_handler() will plug in these values
-			 */
-			"	subq $24, %rsp\n"
-			"	pushq %rdi\n"
-			"	pushq %rsi\n"
-			"	pushq %rdx\n"
-			"	pushq %rcx\n"
-			"	pushq %rax\n"
-			"	pushq %r8\n"
-			"	pushq %r9\n"
-			"	pushq %r10\n"
-			"	pushq %r11\n"
-			"	pushq %rbx\n"
-			"	pushq %rbp\n"
-			"	pushq %r12\n"
-			"	pushq %r13\n"
-			"	pushq %r14\n"
-			"	pushq %r15\n"
+			SAVE_REGS_STRING
 			"	movq %rsp, %rdi\n"
 			"	call trampoline_handler\n"
 			/* Replace saved sp with true return address. */
 			"	movq %rax, 152(%rsp)\n"
-			"	popq %r15\n"
-			"	popq %r14\n"
-			"	popq %r13\n"
-			"	popq %r12\n"
-			"	popq %rbp\n"
-			"	popq %rbx\n"
-			"	popq %r11\n"
-			"	popq %r10\n"
-			"	popq %r9\n"
-			"	popq %r8\n"
-			"	popq %rax\n"
-			"	popq %rcx\n"
-			"	popq %rdx\n"
-			"	popq %rsi\n"
-			"	popq %rdi\n"
-			/* Skip orig_ax, ip, cs */
-			"	addq $24, %rsp\n"
+			RESTORE_REGS_STRING
 			"	popfq\n"
 #else
 			"	pushf\n"
-			/*
-			 * Skip cs, ip, orig_ax and gs.
-			 * trampoline_handler() will plug in these values
-			 */
-			"	subl $16, %esp\n"
-			"	pushl %fs\n"
-			"	pushl %es\n"
-			"	pushl %ds\n"
-			"	pushl %eax\n"
-			"	pushl %ebp\n"
-			"	pushl %edi\n"
-			"	pushl %esi\n"
-			"	pushl %edx\n"
-			"	pushl %ecx\n"
-			"	pushl %ebx\n"
+			SAVE_REGS_STRING
 			"	movl %esp, %eax\n"
 			"	call trampoline_handler\n"
 			/* Move flags to cs */
@@ -629,15 +684,7 @@
 			"	movl %edx, 52(%esp)\n"
 			/* Replace saved flags with true return address. */
 			"	movl %eax, 56(%esp)\n"
-			"	popl %ebx\n"
-			"	popl %ecx\n"
-			"	popl %edx\n"
-			"	popl %esi\n"
-			"	popl %edi\n"
-			"	popl %ebp\n"
-			"	popl %eax\n"
-			/* Skip ds, es, fs, gs, orig_ax and ip */
-			"	addl $24, %esp\n"
+			RESTORE_REGS_STRING
 			"	popf\n"
 #endif
 			"	ret\n");
@@ -805,8 +852,8 @@
 			 * These instructions can be executed directly if it
 			 * jumps back to correct address.
 			 */
-			set_jmp_op((void *)regs->ip,
-				   (void *)orig_ip + (regs->ip - copy_ip));
+			synthesize_reljump((void *)regs->ip,
+				(void *)orig_ip + (regs->ip - copy_ip));
 			p->ainsn.boostable = 1;
 		} else {
 			p->ainsn.boostable = -1;
@@ -1033,6 +1080,358 @@
 	return 0;
 }
 
+
+#ifdef CONFIG_OPTPROBES
+
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+static void __kprobes synthesize_relcall(void *from, void *to)
+{
+	__synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
+					  unsigned long val)
+{
+#ifdef CONFIG_X86_64
+	*addr++ = 0x48;
+	*addr++ = 0xbf;
+#else
+	*addr++ = 0xb8;
+#endif
+	*(unsigned long *)addr = val;
+}
+
+void __kprobes kprobes_optinsn_template_holder(void)
+{
+	asm volatile (
+			".global optprobe_template_entry\n"
+			"optprobe_template_entry: \n"
+#ifdef CONFIG_X86_64
+			/* We don't bother saving the ss register */
+			"	pushq %rsp\n"
+			"	pushfq\n"
+			SAVE_REGS_STRING
+			"	movq %rsp, %rsi\n"
+			".global optprobe_template_val\n"
+			"optprobe_template_val: \n"
+			ASM_NOP5
+			ASM_NOP5
+			".global optprobe_template_call\n"
+			"optprobe_template_call: \n"
+			ASM_NOP5
+			/* Move flags to rsp */
+			"	movq 144(%rsp), %rdx\n"
+			"	movq %rdx, 152(%rsp)\n"
+			RESTORE_REGS_STRING
+			/* Skip flags entry */
+			"	addq $8, %rsp\n"
+			"	popfq\n"
+#else /* CONFIG_X86_32 */
+			"	pushf\n"
+			SAVE_REGS_STRING
+			"	movl %esp, %edx\n"
+			".global optprobe_template_val\n"
+			"optprobe_template_val: \n"
+			ASM_NOP5
+			".global optprobe_template_call\n"
+			"optprobe_template_call: \n"
+			ASM_NOP5
+			RESTORE_REGS_STRING
+			"	addl $4, %esp\n"	/* skip cs */
+			"	popf\n"
+#endif
+			".global optprobe_template_end\n"
+			"optprobe_template_end: \n");
+}
+
+#define TMPL_MOVE_IDX \
+	((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+	((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+	((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+/* Optimized kprobe call back function: called from optinsn */
+static void __kprobes optimized_callback(struct optimized_kprobe *op,
+					 struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	preempt_disable();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(&op->kp);
+	} else {
+		/* Save skipped registers */
+#ifdef CONFIG_X86_64
+		regs->cs = __KERNEL_CS;
+#else
+		regs->cs = __KERNEL_CS | get_kernel_rpl();
+		regs->gs = 0;
+#endif
+		regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+		regs->orig_ax = ~0UL;
+
+		__get_cpu_var(current_kprobe) = &op->kp;
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		opt_pre_handler(&op->kp, regs);
+		__get_cpu_var(current_kprobe) = NULL;
+	}
+	preempt_enable_no_resched();
+}
+
+static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+{
+	int len = 0, ret;
+
+	while (len < RELATIVEJUMP_SIZE) {
+		ret = __copy_instruction(dest + len, src + len, 1);
+		if (!ret || !can_boost(dest + len))
+			return -EINVAL;
+		len += ret;
+	}
+	/* Check whether the address range is reserved */
+	if (ftrace_text_reserved(src, src + len - 1) ||
+	    alternatives_text_reserved(src, src + len - 1))
+		return -EBUSY;
+
+	return len;
+}
+
+/* Check whether insn is indirect jump */
+static int __kprobes insn_is_indirect_jump(struct insn *insn)
+{
+	return ((insn->opcode.bytes[0] == 0xff &&
+		(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+		insn->opcode.bytes[0] == 0xea);	/* Segment based jump */
+}
+
+/* Check whether insn jumps into specified address range */
+static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
+{
+	unsigned long target = 0;
+
+	switch (insn->opcode.bytes[0]) {
+	case 0xe0:	/* loopne */
+	case 0xe1:	/* loope */
+	case 0xe2:	/* loop */
+	case 0xe3:	/* jcxz */
+	case 0xe9:	/* near relative jump */
+	case 0xeb:	/* short relative jump */
+		break;
+	case 0x0f:
+		if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
+			break;
+		return 0;
+	default:
+		if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
+			break;
+		return 0;
+	}
+	target = (unsigned long)insn->next_byte + insn->immediate.value;
+
+	return (start <= target && target <= start + len);
+}
+
+/* Decode whole function to ensure any instructions don't jump into target */
+static int __kprobes can_optimize(unsigned long paddr)
+{
+	int ret;
+	unsigned long addr, size = 0, offset = 0;
+	struct insn insn;
+	kprobe_opcode_t buf[MAX_INSN_SIZE];
+	/* Dummy buffers for lookup_symbol_attrs */
+	static char __dummy_buf[KSYM_NAME_LEN];
+
+	/* Lookup symbol including addr */
+	if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf))
+		return 0;
+
+	/* Check there is enough space for a relative jump. */
+	if (size - offset < RELATIVEJUMP_SIZE)
+		return 0;
+
+	/* Decode instructions */
+	addr = paddr - offset;
+	while (addr < paddr - offset + size) { /* Decode until function end */
+		if (search_exception_tables(addr))
+			/*
+			 * Since some fixup code will jumps into this function,
+			 * we can't optimize kprobe in this function.
+			 */
+			return 0;
+		kernel_insn_init(&insn, (void *)addr);
+		insn_get_opcode(&insn);
+		if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+			ret = recover_probed_instruction(buf, addr);
+			if (ret)
+				return 0;
+			kernel_insn_init(&insn, buf);
+		}
+		insn_get_length(&insn);
+		/* Recover address */
+		insn.kaddr = (void *)addr;
+		insn.next_byte = (void *)(addr + insn.length);
+		/* Check any instructions don't jump into target */
+		if (insn_is_indirect_jump(&insn) ||
+		    insn_jump_into_range(&insn, paddr + INT3_SIZE,
+					 RELATIVE_ADDR_SIZE))
+			return 0;
+		addr += insn.length;
+	}
+
+	return 1;
+}
+
+/* Check optimized_kprobe can actually be optimized. */
+int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+	int i;
+	struct kprobe *p;
+
+	for (i = 1; i < op->optinsn.size; i++) {
+		p = get_kprobe(op->kp.addr + i);
+		if (p && !kprobe_disabled(p))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+/* Check the addr is within the optimized instructions. */
+int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
+					   unsigned long addr)
+{
+	return ((unsigned long)op->kp.addr <= addr &&
+		(unsigned long)op->kp.addr + op->optinsn.size > addr);
+}
+
+/* Free optimized instruction slot */
+static __kprobes
+void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+	if (op->optinsn.insn) {
+		free_optinsn_slot(op->optinsn.insn, dirty);
+		op->optinsn.insn = NULL;
+		op->optinsn.size = 0;
+	}
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+	__arch_remove_optimized_kprobe(op, 1);
+}
+
+/*
+ * Copy replacing target instructions
+ * Target instructions MUST be relocatable (checked inside)
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+	u8 *buf;
+	int ret;
+	long rel;
+
+	if (!can_optimize((unsigned long)op->kp.addr))
+		return -EILSEQ;
+
+	op->optinsn.insn = get_optinsn_slot();
+	if (!op->optinsn.insn)
+		return -ENOMEM;
+
+	/*
+	 * Verify if the address gap is in 2GB range, because this uses
+	 * a relative jump.
+	 */
+	rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+	if (abs(rel) > 0x7fffffff)
+		return -ERANGE;
+
+	buf = (u8 *)op->optinsn.insn;
+
+	/* Copy instructions into the out-of-line buffer */
+	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
+	if (ret < 0) {
+		__arch_remove_optimized_kprobe(op, 0);
+		return ret;
+	}
+	op->optinsn.size = ret;
+
+	/* Copy arch-dep-instance from template */
+	memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+	/* Set probe information */
+	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+	/* Set probe function call */
+	synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+	/* Set returning jmp instruction at the tail of out-of-line buffer */
+	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+			   (u8 *)op->kp.addr + op->optinsn.size);
+
+	flush_icache_range((unsigned long) buf,
+			   (unsigned long) buf + TMPL_END_IDX +
+			   op->optinsn.size + RELATIVEJUMP_SIZE);
+	return 0;
+}
+
+/* Replace a breakpoint (int3) with a relative jump.  */
+int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+{
+	unsigned char jmp_code[RELATIVEJUMP_SIZE];
+	s32 rel = (s32)((long)op->optinsn.insn -
+			((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+	/* Backup instructions which will be replaced by jump address */
+	memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+	       RELATIVE_ADDR_SIZE);
+
+	jmp_code[0] = RELATIVEJUMP_OPCODE;
+	*(s32 *)(&jmp_code[1]) = rel;
+
+	/*
+	 * text_poke_smp doesn't support NMI/MCE code modifying.
+	 * However, since kprobes itself also doesn't support NMI/MCE
+	 * code probing, it's not a problem.
+	 */
+	text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
+	return 0;
+}
+
+/* Replace a relative jump with a breakpoint (int3).  */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+	u8 buf[RELATIVEJUMP_SIZE];
+
+	/* Set int3 to first byte for kprobes */
+	buf[0] = BREAKPOINT_INSTRUCTION;
+	memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+	text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
+}
+
+static int  __kprobes setup_detour_execution(struct kprobe *p,
+					     struct pt_regs *regs,
+					     int reenter)
+{
+	struct optimized_kprobe *op;
+
+	if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+		/* This kprobe is really able to run optimized path. */
+		op = container_of(p, struct optimized_kprobe, kp);
+		/* Detour through copied instructions */
+		regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+		if (!reenter)
+			reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+	return 0;
+}
+#endif
+
 int __init arch_init_kprobes(void)
 {
 	return 0;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9055e58..1c0c6ab 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -301,7 +301,8 @@
 	register_sysctl_table(kernel_root_table2);
 #endif
 	on_each_cpu(cpu_vsyscall_init, NULL, 1);
-	hotcpu_notifier(cpu_vsyscall_notifier, 0);
+	/* notifier priority > KVM */
+	hotcpu_notifier(cpu_vsyscall_notifier, 30);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3c4d0109..970bbd4 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -29,6 +29,7 @@
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
 	select USER_RETURN_NOTIFIER
+	select KVM_MMIO
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7e8faea..4dade6a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -32,7 +32,7 @@
 #include <linux/module.h>
 #include <asm/kvm_emulate.h>
 
-#include "mmu.h"		/* for is_long_mode() */
+#include "x86.h"
 
 /*
  * Opcode effective-address decode tables.
@@ -76,6 +76,8 @@
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
 /* Misc flags */
+#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
+#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
 #define No64	    (1<<28)
 /* Source 2 operand type */
 #define Src2None    (0<<29)
@@ -88,39 +90,40 @@
 enum {
 	Group1_80, Group1_81, Group1_82, Group1_83,
 	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
+	Group8, Group9,
 };
 
 static u32 opcode_table[256] = {
 	/* 0x00 - 0x07 */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
 	/* 0x08 - 0x0F */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
 	ImplicitOps | Stack | No64, 0,
 	/* 0x10 - 0x17 */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
 	/* 0x18 - 0x1F */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
 	/* 0x20 - 0x27 */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
 	DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
 	/* 0x28 - 0x2F */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
 	0, 0, 0, 0,
 	/* 0x30 - 0x37 */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
 	0, 0, 0, 0,
 	/* 0x38 - 0x3F */
@@ -156,7 +159,7 @@
 	Group | Group1_80, Group | Group1_81,
 	Group | Group1_82, Group | Group1_83,
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
 	/* 0x88 - 0x8F */
 	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
 	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -210,7 +213,7 @@
 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
 	/* 0xF0 - 0xF7 */
 	0, 0, 0, 0,
-	ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
+	ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
 	/* 0xF8 - 0xFF */
 	ImplicitOps, 0, ImplicitOps, ImplicitOps,
 	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
@@ -218,16 +221,20 @@
 
 static u32 twobyte_table[256] = {
 	/* 0x00 - 0x0F */
-	0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
-	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
+	0, Group | GroupDual | Group7, 0, 0,
+	0, ImplicitOps, ImplicitOps | Priv, 0,
+	ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
+	0, ImplicitOps | ModRM, 0, 0,
 	/* 0x10 - 0x1F */
 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
 	/* 0x20 - 0x2F */
-	ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
+	ModRM | ImplicitOps | Priv, ModRM | Priv,
+	ModRM | ImplicitOps | Priv, ModRM | Priv,
+	0, 0, 0, 0,
 	0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0x30 - 0x3F */
-	ImplicitOps, 0, ImplicitOps, 0,
-	ImplicitOps, ImplicitOps, 0, 0,
+	ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
+	ImplicitOps, ImplicitOps | Priv, 0, 0,
 	0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0x40 - 0x47 */
 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -257,21 +264,23 @@
 	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
 	/* 0xA8 - 0xAF */
 	ImplicitOps | Stack, ImplicitOps | Stack,
-	0, DstMem | SrcReg | ModRM | BitOp,
+	0, DstMem | SrcReg | ModRM | BitOp | Lock,
 	DstMem | SrcReg | Src2ImmByte | ModRM,
 	DstMem | SrcReg | Src2CL | ModRM,
 	ModRM, 0,
 	/* 0xB0 - 0xB7 */
-	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
-	    DstMem | SrcReg | ModRM | BitOp,
+	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
+	0, DstMem | SrcReg | ModRM | BitOp | Lock,
 	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
 	    DstReg | SrcMem16 | ModRM | Mov,
 	/* 0xB8 - 0xBF */
-	0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
+	0, 0,
+	Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
 	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
 	    DstReg | SrcMem16 | ModRM | Mov,
 	/* 0xC0 - 0xCF */
-	0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
+	0, 0, 0, DstMem | SrcReg | ModRM | Mov,
+	0, 0, 0, Group | GroupDual | Group9,
 	0, 0, 0, 0, 0, 0, 0, 0,
 	/* 0xD0 - 0xDF */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -283,25 +292,41 @@
 
 static u32 group_table[] = {
 	[Group1_80*8] =
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | Lock,
+	ByteOp | DstMem | SrcImm | ModRM,
 	[Group1_81*8] =
-	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
-	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
-	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
-	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+	DstMem | SrcImm | ModRM | Lock,
+	DstMem | SrcImm | ModRM | Lock,
+	DstMem | SrcImm | ModRM | Lock,
+	DstMem | SrcImm | ModRM | Lock,
+	DstMem | SrcImm | ModRM | Lock,
+	DstMem | SrcImm | ModRM | Lock,
+	DstMem | SrcImm | ModRM | Lock,
+	DstMem | SrcImm | ModRM,
 	[Group1_82*8] =
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+	ByteOp | DstMem | SrcImm | ModRM | No64,
 	[Group1_83*8] =
-	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
-	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
-	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
-	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+	DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM,
 	[Group1A*8] =
 	DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
 	[Group3_Byte*8] =
@@ -320,24 +345,39 @@
 	SrcMem | ModRM | Stack, 0,
 	SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
 	[Group7*8] =
-	0, 0, ModRM | SrcMem, ModRM | SrcMem,
+	0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
 	SrcNone | ModRM | DstMem | Mov, 0,
-	SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
+	SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
+	[Group8*8] =
+	0, 0, 0, 0,
+	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
+	DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
+	[Group9*8] =
+	0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
 };
 
 static u32 group2_table[] = {
 	[Group7*8] =
-	SrcNone | ModRM, 0, 0, SrcNone | ModRM,
+	SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM,
 	SrcNone | ModRM | DstMem | Mov, 0,
 	SrcMem16 | ModRM | Mov, 0,
+	[Group9*8] =
+	0, 0, 0, 0, 0, 0, 0, 0,
 };
 
 /* EFLAGS bit definitions. */
+#define EFLG_ID (1<<21)
+#define EFLG_VIP (1<<20)
+#define EFLG_VIF (1<<19)
+#define EFLG_AC (1<<18)
 #define EFLG_VM (1<<17)
 #define EFLG_RF (1<<16)
+#define EFLG_IOPL (3<<12)
+#define EFLG_NT (1<<14)
 #define EFLG_OF (1<<11)
 #define EFLG_DF (1<<10)
 #define EFLG_IF (1<<9)
+#define EFLG_TF (1<<8)
 #define EFLG_SF (1<<7)
 #define EFLG_ZF (1<<6)
 #define EFLG_AF (1<<4)
@@ -606,7 +646,7 @@
 
 	if (linear < fc->start || linear >= fc->end) {
 		size = min(15UL, PAGE_SIZE - offset_in_page(linear));
-		rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
+		rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
 		if (rc)
 			return rc;
 		fc->start = linear;
@@ -661,11 +701,11 @@
 		op_bytes = 3;
 	*address = 0;
 	rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
-			   ctxt->vcpu);
+			   ctxt->vcpu, NULL);
 	if (rc)
 		return rc;
 	rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
-			   ctxt->vcpu);
+			   ctxt->vcpu, NULL);
 	return rc;
 }
 
@@ -889,6 +929,7 @@
 
 	switch (mode) {
 	case X86EMUL_MODE_REAL:
+	case X86EMUL_MODE_VM86:
 	case X86EMUL_MODE_PROT16:
 		def_op_bytes = def_ad_bytes = 2;
 		break;
@@ -975,7 +1016,7 @@
 	}
 
 	if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
-		kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
+		kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
 		return -1;
 	}
 
@@ -1196,13 +1237,56 @@
 	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
 						 c->regs[VCPU_REGS_RSP]),
 				dest, len, ctxt->vcpu);
-	if (rc != 0)
+	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
 	return rc;
 }
 
+static int emulate_popf(struct x86_emulate_ctxt *ctxt,
+		       struct x86_emulate_ops *ops,
+		       void *dest, int len)
+{
+	int rc;
+	unsigned long val, change_mask;
+	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+	int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
+
+	rc = emulate_pop(ctxt, ops, &val, len);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
+		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
+
+	switch(ctxt->mode) {
+	case X86EMUL_MODE_PROT64:
+	case X86EMUL_MODE_PROT32:
+	case X86EMUL_MODE_PROT16:
+		if (cpl == 0)
+			change_mask |= EFLG_IOPL;
+		if (cpl <= iopl)
+			change_mask |= EFLG_IF;
+		break;
+	case X86EMUL_MODE_VM86:
+		if (iopl < 3) {
+			kvm_inject_gp(ctxt->vcpu, 0);
+			return X86EMUL_PROPAGATE_FAULT;
+		}
+		change_mask |= EFLG_IF;
+		break;
+	default: /* real mode */
+		change_mask |= (EFLG_IOPL | EFLG_IF);
+		break;
+	}
+
+	*(unsigned long *)dest =
+		(ctxt->eflags & ~change_mask) | (val & change_mask);
+
+	return rc;
+}
+
 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
 {
 	struct decode_cache *c = &ctxt->decode;
@@ -1225,7 +1309,7 @@
 	if (rc != 0)
 		return rc;
 
-	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
+	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
 	return rc;
 }
 
@@ -1370,7 +1454,7 @@
 	int rc;
 
 	rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
-	if (rc != 0)
+	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
 	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
@@ -1385,7 +1469,7 @@
 		       (u32) c->regs[VCPU_REGS_RBX];
 
 		rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
-		if (rc != 0)
+		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		ctxt->eflags |= EFLG_ZF;
 	}
@@ -1407,7 +1491,7 @@
 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
 	if (rc)
 		return rc;
-	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
 	return rc;
 }
 
@@ -1451,7 +1535,7 @@
 					&c->dst.val,
 					c->dst.bytes,
 					ctxt->vcpu);
-		if (rc != 0)
+		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
 	case OP_NONE:
@@ -1514,9 +1598,8 @@
 	u64 msr_data;
 
 	/* syscall is not available in real mode */
-	if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
-		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
-		return -1;
+	if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
+		return X86EMUL_UNHANDLEABLE;
 
 	setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -1553,7 +1636,7 @@
 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
 	}
 
-	return 0;
+	return X86EMUL_CONTINUE;
 }
 
 static int
@@ -1563,22 +1646,17 @@
 	struct kvm_segment cs, ss;
 	u64 msr_data;
 
-	/* inject #UD if LOCK prefix is used */
-	if (c->lock_prefix)
-		return -1;
-
-	/* inject #GP if in real mode or paging is disabled */
-	if (ctxt->mode == X86EMUL_MODE_REAL ||
-		!(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+	/* inject #GP if in real mode */
+	if (ctxt->mode == X86EMUL_MODE_REAL) {
 		kvm_inject_gp(ctxt->vcpu, 0);
-		return -1;
+		return X86EMUL_UNHANDLEABLE;
 	}
 
 	/* XXX sysenter/sysexit have not been tested in 64bit mode.
 	* Therefore, we inject an #UD.
 	*/
 	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		return -1;
+		return X86EMUL_UNHANDLEABLE;
 
 	setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -1587,13 +1665,13 @@
 	case X86EMUL_MODE_PROT32:
 		if ((msr_data & 0xfffc) == 0x0) {
 			kvm_inject_gp(ctxt->vcpu, 0);
-			return -1;
+			return X86EMUL_PROPAGATE_FAULT;
 		}
 		break;
 	case X86EMUL_MODE_PROT64:
 		if (msr_data == 0x0) {
 			kvm_inject_gp(ctxt->vcpu, 0);
-			return -1;
+			return X86EMUL_PROPAGATE_FAULT;
 		}
 		break;
 	}
@@ -1618,7 +1696,7 @@
 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
 	c->regs[VCPU_REGS_RSP] = msr_data;
 
-	return 0;
+	return X86EMUL_CONTINUE;
 }
 
 static int
@@ -1629,21 +1707,11 @@
 	u64 msr_data;
 	int usermode;
 
-	/* inject #UD if LOCK prefix is used */
-	if (c->lock_prefix)
-		return -1;
-
-	/* inject #GP if in real mode or paging is disabled */
-	if (ctxt->mode == X86EMUL_MODE_REAL
-		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+	/* inject #GP if in real mode or Virtual 8086 mode */
+	if (ctxt->mode == X86EMUL_MODE_REAL ||
+	    ctxt->mode == X86EMUL_MODE_VM86) {
 		kvm_inject_gp(ctxt->vcpu, 0);
-		return -1;
-	}
-
-	/* sysexit must be called from CPL 0 */
-	if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
-		kvm_inject_gp(ctxt->vcpu, 0);
-		return -1;
+		return X86EMUL_UNHANDLEABLE;
 	}
 
 	setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1661,7 +1729,7 @@
 		cs.selector = (u16)(msr_data + 16);
 		if ((msr_data & 0xfffc) == 0x0) {
 			kvm_inject_gp(ctxt->vcpu, 0);
-			return -1;
+			return X86EMUL_PROPAGATE_FAULT;
 		}
 		ss.selector = (u16)(msr_data + 24);
 		break;
@@ -1669,7 +1737,7 @@
 		cs.selector = (u16)(msr_data + 32);
 		if (msr_data == 0x0) {
 			kvm_inject_gp(ctxt->vcpu, 0);
-			return -1;
+			return X86EMUL_PROPAGATE_FAULT;
 		}
 		ss.selector = cs.selector + 8;
 		cs.db = 0;
@@ -1685,7 +1753,58 @@
 	c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
 	c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
 
-	return 0;
+	return X86EMUL_CONTINUE;
+}
+
+static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
+{
+	int iopl;
+	if (ctxt->mode == X86EMUL_MODE_REAL)
+		return false;
+	if (ctxt->mode == X86EMUL_MODE_VM86)
+		return true;
+	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+	return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
+}
+
+static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
+					    struct x86_emulate_ops *ops,
+					    u16 port, u16 len)
+{
+	struct kvm_segment tr_seg;
+	int r;
+	u16 io_bitmap_ptr;
+	u8 perm, bit_idx = port & 0x7;
+	unsigned mask = (1 << len) - 1;
+
+	kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
+	if (tr_seg.unusable)
+		return false;
+	if (tr_seg.limit < 103)
+		return false;
+	r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
+			  NULL);
+	if (r != X86EMUL_CONTINUE)
+		return false;
+	if (io_bitmap_ptr + port/8 > tr_seg.limit)
+		return false;
+	r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
+			  ctxt->vcpu, NULL);
+	if (r != X86EMUL_CONTINUE)
+		return false;
+	if ((perm >> bit_idx) & mask)
+		return false;
+	return true;
+}
+
+static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
+				 struct x86_emulate_ops *ops,
+				 u16 port, u16 len)
+{
+	if (emulator_bad_iopl(ctxt))
+		if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
+			return false;
+	return true;
 }
 
 int
@@ -1709,6 +1828,18 @@
 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
 	saved_eip = c->eip;
 
+	/* LOCK prefix is allowed only with some instructions */
+	if (c->lock_prefix && !(c->d & Lock)) {
+		kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+		goto done;
+	}
+
+	/* Privileged instruction can be executed only in CPL=0 */
+	if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+		kvm_inject_gp(ctxt->vcpu, 0);
+		goto done;
+	}
+
 	if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
 		memop = c->modrm_ea;
 
@@ -1749,7 +1880,7 @@
 					&c->src.val,
 					c->src.bytes,
 					ctxt->vcpu);
-		if (rc != 0)
+		if (rc != X86EMUL_CONTINUE)
 			goto done;
 		c->src.orig_val = c->src.val;
 	}
@@ -1768,12 +1899,15 @@
 			c->dst.ptr = (void *)c->dst.ptr +
 						   (c->src.val & mask) / 8;
 		}
-		if (!(c->d & Mov) &&
-				   /* optimisation - avoid slow emulated read */
-		    ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
-					   &c->dst.val,
-					  c->dst.bytes, ctxt->vcpu)) != 0))
-			goto done;
+		if (!(c->d & Mov)) {
+			/* optimisation - avoid slow emulated read */
+			rc = ops->read_emulated((unsigned long)c->dst.ptr,
+						&c->dst.val,
+						c->dst.bytes,
+						ctxt->vcpu);
+			if (rc != X86EMUL_CONTINUE)
+				goto done;
+		}
 	}
 	c->dst.orig_val = c->dst.val;
 
@@ -1876,7 +2010,12 @@
 		break;
 	case 0x6c:		/* insb */
 	case 0x6d:		/* insw/insd */
-		 if (kvm_emulate_pio_string(ctxt->vcpu,
+		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+					  (c->d & ByteOp) ? 1 : c->op_bytes)) {
+			kvm_inject_gp(ctxt->vcpu, 0);
+			goto done;
+		}
+		if (kvm_emulate_pio_string(ctxt->vcpu,
 				1,
 				(c->d & ByteOp) ? 1 : c->op_bytes,
 				c->rep_prefix ?
@@ -1892,6 +2031,11 @@
 		return 0;
 	case 0x6e:		/* outsb */
 	case 0x6f:		/* outsw/outsd */
+		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+					  (c->d & ByteOp) ? 1 : c->op_bytes)) {
+			kvm_inject_gp(ctxt->vcpu, 0);
+			goto done;
+		}
 		if (kvm_emulate_pio_string(ctxt->vcpu,
 				0,
 				(c->d & ByteOp) ? 1 : c->op_bytes,
@@ -1978,25 +2122,19 @@
 		break;
 	case 0x8e: { /* mov seg, r/m16 */
 		uint16_t sel;
-		int type_bits;
-		int err;
 
 		sel = c->src.val;
+
+		if (c->modrm_reg == VCPU_SREG_CS ||
+		    c->modrm_reg > VCPU_SREG_GS) {
+			kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+			goto done;
+		}
+
 		if (c->modrm_reg == VCPU_SREG_SS)
 			toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
 
-		if (c->modrm_reg <= 5) {
-			type_bits = (c->modrm_reg == 1) ? 9 : 1;
-			err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
-							  type_bits, c->modrm_reg);
-		} else {
-			printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
-					c->modrm);
-			goto cannot_emulate;
-		}
-
-		if (err < 0)
-			goto cannot_emulate;
+		rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
 
 		c->dst.type = OP_NONE;  /* Disable writeback. */
 		break;
@@ -2025,7 +2163,10 @@
 		c->dst.type = OP_REG;
 		c->dst.ptr = (unsigned long *) &ctxt->eflags;
 		c->dst.bytes = c->op_bytes;
-		goto pop_instruction;
+		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
+		break;
 	case 0xa0 ... 0xa1:	/* mov */
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
 		c->dst.val = c->src.val;
@@ -2039,11 +2180,12 @@
 		c->dst.ptr = (unsigned long *)register_address(c,
 						   es_base(ctxt),
 						   c->regs[VCPU_REGS_RDI]);
-		if ((rc = ops->read_emulated(register_address(c,
-					   seg_override_base(ctxt, c),
-					c->regs[VCPU_REGS_RSI]),
+		rc = ops->read_emulated(register_address(c,
+						seg_override_base(ctxt, c),
+						c->regs[VCPU_REGS_RSI]),
 					&c->dst.val,
-					c->dst.bytes, ctxt->vcpu)) != 0)
+					c->dst.bytes, ctxt->vcpu);
+		if (rc != X86EMUL_CONTINUE)
 			goto done;
 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2058,10 +2200,11 @@
 		c->src.ptr = (unsigned long *)register_address(c,
 				       seg_override_base(ctxt, c),
 						   c->regs[VCPU_REGS_RSI]);
-		if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
-						&c->src.val,
-						c->src.bytes,
-						ctxt->vcpu)) != 0)
+		rc = ops->read_emulated((unsigned long)c->src.ptr,
+					&c->src.val,
+					c->src.bytes,
+					ctxt->vcpu);
+		if (rc != X86EMUL_CONTINUE)
 			goto done;
 
 		c->dst.type = OP_NONE; /* Disable writeback. */
@@ -2069,10 +2212,11 @@
 		c->dst.ptr = (unsigned long *)register_address(c,
 						   es_base(ctxt),
 						   c->regs[VCPU_REGS_RDI]);
-		if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
-						&c->dst.val,
-						c->dst.bytes,
-						ctxt->vcpu)) != 0)
+		rc = ops->read_emulated((unsigned long)c->dst.ptr,
+					&c->dst.val,
+					c->dst.bytes,
+					ctxt->vcpu);
+		if (rc != X86EMUL_CONTINUE)
 			goto done;
 
 		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
@@ -2102,12 +2246,13 @@
 		c->dst.type = OP_REG;
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-		if ((rc = ops->read_emulated(register_address(c,
-						 seg_override_base(ctxt, c),
-						 c->regs[VCPU_REGS_RSI]),
-						 &c->dst.val,
-						 c->dst.bytes,
-						 ctxt->vcpu)) != 0)
+		rc = ops->read_emulated(register_address(c,
+						seg_override_base(ctxt, c),
+						c->regs[VCPU_REGS_RSI]),
+					&c->dst.val,
+					c->dst.bytes,
+					ctxt->vcpu);
+		if (rc != X86EMUL_CONTINUE)
 			goto done;
 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2163,11 +2308,9 @@
 	case 0xe9: /* jmp rel */
 		goto jmp;
 	case 0xea: /* jmp far */
-		if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
-					VCPU_SREG_CS) < 0) {
-			DPRINTF("jmp far: Failed to load CS descriptor\n");
-			goto cannot_emulate;
-		}
+		if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
+						VCPU_SREG_CS))
+			goto done;
 
 		c->eip = c->src.val;
 		break;
@@ -2185,7 +2328,13 @@
 	case 0xef: /* out (e/r)ax,dx */
 		port = c->regs[VCPU_REGS_RDX];
 		io_dir_in = 0;
-	do_io:	if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
+	do_io:
+		if (!emulator_io_permited(ctxt, ops, port,
+					  (c->d & ByteOp) ? 1 : c->op_bytes)) {
+			kvm_inject_gp(ctxt->vcpu, 0);
+			goto done;
+		}
+		if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
 				   (c->d & ByteOp) ? 1 : c->op_bytes,
 				   port) != 0) {
 			c->eip = saved_eip;
@@ -2210,13 +2359,21 @@
 		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
 	case 0xfa: /* cli */
-		ctxt->eflags &= ~X86_EFLAGS_IF;
-		c->dst.type = OP_NONE;	/* Disable writeback. */
+		if (emulator_bad_iopl(ctxt))
+			kvm_inject_gp(ctxt->vcpu, 0);
+		else {
+			ctxt->eflags &= ~X86_EFLAGS_IF;
+			c->dst.type = OP_NONE;	/* Disable writeback. */
+		}
 		break;
 	case 0xfb: /* sti */
-		toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
-		ctxt->eflags |= X86_EFLAGS_IF;
-		c->dst.type = OP_NONE;	/* Disable writeback. */
+		if (emulator_bad_iopl(ctxt))
+			kvm_inject_gp(ctxt->vcpu, 0);
+		else {
+			toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
+			ctxt->eflags |= X86_EFLAGS_IF;
+			c->dst.type = OP_NONE;	/* Disable writeback. */
+		}
 		break;
 	case 0xfc: /* cld */
 		ctxt->eflags &= ~EFLG_DF;
@@ -2319,8 +2476,9 @@
 		}
 		break;
 	case 0x05: 		/* syscall */
-		if (emulate_syscall(ctxt) == -1)
-			goto cannot_emulate;
+		rc = emulate_syscall(ctxt);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
 		else
 			goto writeback;
 		break;
@@ -2391,14 +2549,16 @@
 		c->dst.type = OP_NONE;
 		break;
 	case 0x34:		/* sysenter */
-		if (emulate_sysenter(ctxt) == -1)
-			goto cannot_emulate;
+		rc = emulate_sysenter(ctxt);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
 		else
 			goto writeback;
 		break;
 	case 0x35:		/* sysexit */
-		if (emulate_sysexit(ctxt) == -1)
-			goto cannot_emulate;
+		rc = emulate_sysexit(ctxt);
+		if (rc != X86EMUL_CONTINUE)
+			goto done;
 		else
 			goto writeback;
 		break;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 15578f1..294698b 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -242,11 +242,11 @@
 {
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 						 irq_ack_notifier);
-	spin_lock(&ps->inject_lock);
+	raw_spin_lock(&ps->inject_lock);
 	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
 		atomic_inc(&ps->pit_timer.pending);
 	ps->irq_ack = 1;
-	spin_unlock(&ps->inject_lock);
+	raw_spin_unlock(&ps->inject_lock);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -605,7 +605,7 @@
 	.write    = speaker_ioport_write,
 };
 
-/* Caller must have writers lock on slots_lock */
+/* Caller must hold slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
 	struct kvm_pit *pit;
@@ -624,7 +624,7 @@
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-	spin_lock_init(&pit->pit_state.inject_lock);
+	raw_spin_lock_init(&pit->pit_state.inject_lock);
 
 	kvm->arch.vpit = pit;
 	pit->kvm = kvm;
@@ -645,13 +645,13 @@
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
 	if (ret < 0)
 		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
-		ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
+		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
 						&pit->speaker_dev);
 		if (ret < 0)
 			goto fail_unregister;
@@ -660,11 +660,12 @@
 	return pit;
 
 fail_unregister:
-	__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
 
 fail:
-	if (pit->irq_source_id >= 0)
-		kvm_free_irq_source_id(kvm, pit->irq_source_id);
+	kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+	kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+	kvm_free_irq_source_id(kvm, pit->irq_source_id);
 
 	kfree(pit);
 	return NULL;
@@ -723,12 +724,12 @@
 		/* Try to inject pending interrupts when
 		 * last one has been acked.
 		 */
-		spin_lock(&ps->inject_lock);
+		raw_spin_lock(&ps->inject_lock);
 		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
 			ps->irq_ack = 0;
 			inject = 1;
 		}
-		spin_unlock(&ps->inject_lock);
+		raw_spin_unlock(&ps->inject_lock);
 		if (inject)
 			__inject_pit_timer_intr(kvm);
 	}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index d4c1c7f..900d6b0 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -27,7 +27,7 @@
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
-	spinlock_t inject_lock;
+	raw_spinlock_t inject_lock;
 	unsigned long irq_ack;
 	struct kvm_irq_ack_notifier irq_ack_notifier;
 };
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index d057c0c..07771da 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -44,18 +44,19 @@
 	 * Other interrupt may be delivered to PIC while lock is dropped but
 	 * it should be safe since PIC state is already updated at this stage.
 	 */
-	spin_unlock(&s->pics_state->lock);
+	raw_spin_unlock(&s->pics_state->lock);
 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
-	spin_lock(&s->pics_state->lock);
+	raw_spin_lock(&s->pics_state->lock);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
 	struct kvm_pic *s = pic_irqchip(kvm);
-	spin_lock(&s->lock);
+
+	raw_spin_lock(&s->lock);
 	s->pics[0].isr_ack = 0xff;
 	s->pics[1].isr_ack = 0xff;
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 }
 
 /*
@@ -156,9 +157,9 @@
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	pic_update_irq(s);
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 }
 
 int kvm_pic_set_irq(void *opaque, int irq, int level)
@@ -166,14 +167,14 @@
 	struct kvm_pic *s = opaque;
 	int ret = -1;
 
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
 				      s->pics[irq >> 3].imr, ret == 0);
 	}
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 
 	return ret;
 }
@@ -203,7 +204,7 @@
 	int irq, irq2, intno;
 	struct kvm_pic *s = pic_irqchip(kvm);
 
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
 		pic_intack(&s->pics[0], irq);
@@ -228,7 +229,7 @@
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 
 	return intno;
 }
@@ -442,7 +443,7 @@
 			printk(KERN_ERR "PIC: non byte write\n");
 		return 0;
 	}
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -455,7 +456,7 @@
 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
 		break;
 	}
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 	return 0;
 }
 
@@ -472,7 +473,7 @@
 			printk(KERN_ERR "PIC: non byte read\n");
 		return 0;
 	}
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -486,7 +487,7 @@
 		break;
 	}
 	*(unsigned char *)val = data;
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 	return 0;
 }
 
@@ -520,7 +521,7 @@
 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
 	if (!s)
 		return NULL;
-	spin_lock_init(&s->lock);
+	raw_spin_lock_init(&s->lock);
 	s->kvm = kvm;
 	s->pics[0].elcr_mask = 0xf8;
 	s->pics[1].elcr_mask = 0xde;
@@ -533,7 +534,9 @@
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0) {
 		kfree(s);
 		return NULL;
@@ -541,3 +544,14 @@
 
 	return s;
 }
+
+void kvm_destroy_pic(struct kvm *kvm)
+{
+	struct kvm_pic *vpic = kvm->arch.vpic;
+
+	if (vpic) {
+		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
+		kvm->arch.vpic = NULL;
+		kfree(vpic);
+	}
+}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index be399e2..34b15915 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -62,7 +62,7 @@
 };
 
 struct kvm_pic {
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	unsigned pending_acks;
 	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
@@ -75,6 +75,7 @@
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
+void kvm_destroy_pic(struct kvm *kvm);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 void kvm_pic_clear_isr_ack(struct kvm *kvm);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 7bcc5b6..cff851c 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -1,6 +1,11 @@
 #ifndef ASM_KVM_CACHE_REGS_H
 #define ASM_KVM_CACHE_REGS_H
 
+#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
+#define KVM_POSSIBLE_CR4_GUEST_BITS				  \
+	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+	 | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
+
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
 					      enum kvm_reg reg)
 {
@@ -38,4 +43,30 @@
 	return vcpu->arch.pdptrs[index];
 }
 
+static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
+{
+	ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
+	if (tmask & vcpu->arch.cr0_guest_owned_bits)
+		kvm_x86_ops->decache_cr0_guest_bits(vcpu);
+	return vcpu->arch.cr0 & mask;
+}
+
+static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr0_bits(vcpu, ~0UL);
+}
+
+static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
+{
+	ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
+	if (tmask & vcpu->arch.cr4_guest_owned_bits)
+		kvm_x86_ops->decache_cr4_guest_bits(vcpu);
+	return vcpu->arch.cr4 & mask;
+}
+
+static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr4_bits(vcpu, ~0UL);
+}
+
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ba8c045..4b224f9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1246,3 +1246,34 @@
 
 	return 0;
 }
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 1;
+
+	/* if this is ICR write vector before command */
+	if (reg == APIC_ICR)
+		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+	return apic_reg_write(apic, reg, (u32)data);
+}
+
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 low, high = 0;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 1;
+
+	if (apic_reg_read(apic, reg, 4, &low))
+		return 1;
+	if (reg == APIC_ICR)
+		apic_reg_read(apic, APIC_ICR2, 4, &high);
+
+	*data = (((u64)high) << 32) | low;
+
+	return 0;
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 40010b0..f5fe32c 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -48,4 +48,12 @@
 
 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+}
 #endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 89a49fb..741373e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,6 +18,7 @@
  */
 
 #include "mmu.h"
+#include "x86.h"
 #include "kvm_cache_regs.h"
 
 #include <linux/kvm_host.h>
@@ -29,6 +30,7 @@
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
 #include <linux/compiler.h>
+#include <linux/srcu.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -136,16 +138,6 @@
 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
 			| PT64_NX_MASK)
 
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
-
-#define PT_PDPE_LEVEL 3
-#define PT_DIRECTORY_LEVEL 2
-#define PT_PAGE_TABLE_LEVEL 1
-
 #define RMAP_EXT 4
 
 #define ACC_EXEC_MASK    1
@@ -153,6 +145,9 @@
 #define ACC_USER_MASK    PT_USER_MASK
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
 
+#include <trace/events/kvm.h>
+
+#undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
 #include "mmutrace.h"
 
@@ -229,7 +224,7 @@
 
 static int is_write_protection(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.cr0 & X86_CR0_WP;
+	return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
 }
 
 static int is_cpuid_PSE36(void)
@@ -239,7 +234,7 @@
 
 static int is_nx(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.shadow_efer & EFER_NX;
+	return vcpu->arch.efer & EFER_NX;
 }
 
 static int is_shadow_present_pte(u64 pte)
@@ -253,7 +248,7 @@
 	return pte & PT_PAGE_SIZE_MASK;
 }
 
-static int is_writeble_pte(unsigned long pte)
+static int is_writable_pte(unsigned long pte)
 {
 	return pte & PT_WRITABLE_MASK;
 }
@@ -470,24 +465,10 @@
 
 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 {
-	unsigned long page_size = PAGE_SIZE;
-	struct vm_area_struct *vma;
-	unsigned long addr;
+	unsigned long page_size;
 	int i, ret = 0;
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return PT_PAGE_TABLE_LEVEL;
-
-	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm, addr);
-	if (!vma)
-		goto out;
-
-	page_size = vma_kernel_pagesize(vma);
-
-out:
-	up_read(&current->mm->mmap_sem);
+	page_size = kvm_host_page_size(kvm, gfn);
 
 	for (i = PT_PAGE_TABLE_LEVEL;
 	     i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@@ -503,8 +484,7 @@
 static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 {
 	struct kvm_memory_slot *slot;
-	int host_level;
-	int level = PT_PAGE_TABLE_LEVEL;
+	int host_level, level, max_level;
 
 	slot = gfn_to_memslot(vcpu->kvm, large_gfn);
 	if (slot && slot->dirty_bitmap)
@@ -515,7 +495,10 @@
 	if (host_level == PT_PAGE_TABLE_LEVEL)
 		return host_level;
 
-	for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
+	max_level = kvm_x86_ops->get_lpage_level() < host_level ?
+		kvm_x86_ops->get_lpage_level() : host_level;
+
+	for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
 		if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
 			break;
 
@@ -633,7 +616,7 @@
 	pfn = spte_to_pfn(*spte);
 	if (*spte & shadow_accessed_mask)
 		kvm_set_pfn_accessed(pfn);
-	if (is_writeble_pte(*spte))
+	if (is_writable_pte(*spte))
 		kvm_set_pfn_dirty(pfn);
 	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
 	if (!*rmapp) {
@@ -662,6 +645,7 @@
 			prev_desc = desc;
 			desc = desc->more;
 		}
+		pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
 		BUG();
 	}
 }
@@ -708,7 +692,7 @@
 		BUG_ON(!spte);
 		BUG_ON(!(*spte & PT_PRESENT_MASK));
 		rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-		if (is_writeble_pte(*spte)) {
+		if (is_writable_pte(*spte)) {
 			__set_spte(spte, *spte & ~PT_WRITABLE_MASK);
 			write_protected = 1;
 		}
@@ -732,7 +716,7 @@
 			BUG_ON(!(*spte & PT_PRESENT_MASK));
 			BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
 			pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
-			if (is_writeble_pte(*spte)) {
+			if (is_writable_pte(*spte)) {
 				rmap_remove(kvm, spte);
 				--kvm->stat.lpages;
 				__set_spte(spte, shadow_trap_nonpresent_pte);
@@ -787,7 +771,7 @@
 
 			new_spte &= ~PT_WRITABLE_MASK;
 			new_spte &= ~SPTE_HOST_WRITEABLE;
-			if (is_writeble_pte(*spte))
+			if (is_writable_pte(*spte))
 				kvm_set_pfn_dirty(spte_to_pfn(*spte));
 			__set_spte(spte, new_spte);
 			spte = rmap_next(kvm, rmapp, spte);
@@ -805,35 +789,32 @@
 					 unsigned long data))
 {
 	int i, j;
+	int ret;
 	int retval = 0;
+	struct kvm_memslots *slots;
 
-	/*
-	 * If mmap_sem isn't taken, we can look the memslots with only
-	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
-	 */
-	for (i = 0; i < kvm->nmemslots; i++) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	slots = rcu_dereference(kvm->memslots);
+
+	for (i = 0; i < slots->nmemslots; i++) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
-		/* mmu_lock protects userspace_addr */
-		if (!start)
-			continue;
-
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-			retval |= handler(kvm, &memslot->rmap[gfn_offset],
-					  data);
+			ret = handler(kvm, &memslot->rmap[gfn_offset], data);
 
 			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
 				int idx = gfn_offset;
 				idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
-				retval |= handler(kvm,
+				ret |= handler(kvm,
 					&memslot->lpage_info[j][idx].rmap_pde,
 					data);
 			}
+			trace_kvm_age_page(hva, memslot, ret);
+			retval |= ret;
 		}
 	}
 
@@ -856,9 +837,15 @@
 	u64 *spte;
 	int young = 0;
 
-	/* always return old for EPT */
+	/*
+	 * Emulate the accessed bit for EPT, by checking if this page has
+	 * an EPT mapping, and clearing it if it does. On the next access,
+	 * a new EPT mapping will be established.
+	 * This has some overhead, but not as much as the cost of swapping
+	 * out actively used pages or breaking up actively used hugepages.
+	 */
 	if (!shadow_accessed_mask)
-		return 0;
+		return kvm_unmap_rmapp(kvm, rmapp, data);
 
 	spte = rmap_next(kvm, rmapp, NULL);
 	while (spte) {
@@ -1615,7 +1602,7 @@
 
 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 {
-	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
+	int slot = memslot_id(kvm, gfn);
 	struct kvm_mmu_page *sp = page_header(__pa(pte));
 
 	__set_bit(slot, sp->slot_bitmap);
@@ -1639,7 +1626,7 @@
 {
 	struct page *page;
 
-	gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+	gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
 	if (gpa == UNMAPPED_GVA)
 		return NULL;
@@ -1852,7 +1839,7 @@
 		 * is responsibility of mmu_get_page / kvm_sync_page.
 		 * Same reasoning can be applied to dirty page accounting.
 		 */
-		if (!can_unsync && is_writeble_pte(*sptep))
+		if (!can_unsync && is_writable_pte(*sptep))
 			goto set_pte;
 
 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
@@ -1860,7 +1847,7 @@
 				 __func__, gfn);
 			ret = 1;
 			pte_access &= ~ACC_WRITE_MASK;
-			if (is_writeble_pte(spte))
+			if (is_writable_pte(spte))
 				spte &= ~PT_WRITABLE_MASK;
 		}
 	}
@@ -1881,7 +1868,7 @@
 			 bool reset_host_protection)
 {
 	int was_rmapped = 0;
-	int was_writeble = is_writeble_pte(*sptep);
+	int was_writable = is_writable_pte(*sptep);
 	int rmap_count;
 
 	pgprintk("%s: spte %llx access %x write_fault %d"
@@ -1932,7 +1919,7 @@
 		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
 			rmap_recycle(vcpu, sptep, gfn);
 	} else {
-		if (was_writeble)
+		if (was_writable)
 			kvm_release_pfn_dirty(pfn);
 		else
 			kvm_release_pfn_clean(pfn);
@@ -2162,8 +2149,11 @@
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+				  u32 access, u32 *error)
 {
+	if (error)
+		*error = 0;
 	return vaddr;
 }
 
@@ -2747,7 +2737,7 @@
 	if (tdp_enabled)
 		return 0;
 
-	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+	gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -2847,16 +2837,13 @@
 	 */
 	page = alloc_page(GFP_KERNEL | __GFP_DMA32);
 	if (!page)
-		goto error_1;
+		return -ENOMEM;
+
 	vcpu->arch.mmu.pae_root = page_address(page);
 	for (i = 0; i < 4; ++i)
 		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
 
 	return 0;
-
-error_1:
-	free_mmu_pages(vcpu);
-	return -ENOMEM;
 }
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
@@ -2936,10 +2923,9 @@
 	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
-		int npages;
+		int npages, idx;
 
-		if (!down_read_trylock(&kvm->slots_lock))
-			continue;
+		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 		npages = kvm->arch.n_alloc_mmu_pages -
 			 kvm->arch.n_free_mmu_pages;
@@ -2952,7 +2938,7 @@
 		nr_to_scan--;
 
 		spin_unlock(&kvm->mmu_lock);
-		up_read(&kvm->slots_lock);
+		srcu_read_unlock(&kvm->srcu, idx);
 	}
 	if (kvm_freed)
 		list_move_tail(&kvm_freed->vm_list, &vm_list);
@@ -3019,9 +3005,11 @@
 	int i;
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->nmemslots; i++)
-		nr_pages += kvm->memslots[i].npages;
+	slots = rcu_dereference(kvm->memslots);
+	for (i = 0; i < slots->nmemslots; i++)
+		nr_pages += slots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
@@ -3246,7 +3234,7 @@
 		if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
 			audit_mappings_page(vcpu, ent, va, level - 1);
 		else {
-			gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
+			gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
 			gfn_t gfn = gpa >> PAGE_SHIFT;
 			pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
 			hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
@@ -3291,10 +3279,12 @@
 static int count_rmaps(struct kvm_vcpu *vcpu)
 {
 	int nmaps = 0;
-	int i, j, k;
+	int i, j, k, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+		struct kvm_memory_slot *m = &slots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
@@ -3317,6 +3307,7 @@
 			}
 		}
 	}
+	srcu_read_unlock(&kvm->srcu, idx);
 	return nmaps;
 }
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 61a1b38..be66759 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -2,6 +2,7 @@
 #define __KVM_X86_MMU_H
 
 #include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
 
 #define PT64_PT_BITS 9
 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
@@ -37,6 +38,16 @@
 #define PT32_ROOT_LEVEL 2
 #define PT32E_ROOT_LEVEL 3
 
+#define PT_PDPE_LEVEL 3
+#define PT_DIRECTORY_LEVEL 2
+#define PT_PAGE_TABLE_LEVEL 1
+
+#define PFERR_PRESENT_MASK (1U << 0)
+#define PFERR_WRITE_MASK (1U << 1)
+#define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
+#define PFERR_FETCH_MASK (1U << 4)
+
 int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
 
 static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
@@ -53,30 +64,6 @@
 	return kvm_mmu_load(vcpu);
 }
 
-static inline int is_long_mode(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_X86_64
-	return vcpu->arch.shadow_efer & EFER_LMA;
-#else
-	return 0;
-#endif
-}
-
-static inline int is_pae(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.cr4 & X86_CR4_PAE;
-}
-
-static inline int is_pse(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.cr4 & X86_CR4_PSE;
-}
-
-static inline int is_paging(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.cr0 & X86_CR0_PG;
-}
-
 static inline int is_present_gpte(unsigned long pte)
 {
 	return pte & PT_PRESENT_MASK;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ede2131..81eab9a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -162,7 +162,7 @@
 		if (rsvd_fault)
 			goto access_error;
 
-		if (write_fault && !is_writeble_pte(pte))
+		if (write_fault && !is_writable_pte(pte))
 			if (user_fault || is_write_protection(vcpu))
 				goto access_error;
 
@@ -490,18 +490,23 @@
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+			       u32 *error)
 {
 	struct guest_walker walker;
 	gpa_t gpa = UNMAPPED_GVA;
 	int r;
 
-	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
+	r = FNAME(walk_addr)(&walker, vcpu, vaddr,
+			     !!(access & PFERR_WRITE_MASK),
+			     !!(access & PFERR_USER_MASK),
+			     !!(access & PFERR_FETCH_MASK));
 
 	if (r) {
 		gpa = gfn_to_gpa(walker.gfn);
 		gpa |= vaddr & ~PAGE_MASK;
-	}
+	} else if (error)
+		*error = walker.error_code;
 
 	return gpa;
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1d9b338..52f78dd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -231,7 +231,7 @@
 		efer &= ~EFER_LME;
 
 	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
-	vcpu->arch.shadow_efer = efer;
+	vcpu->arch.efer = efer;
 }
 
 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -540,6 +540,8 @@
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct vmcb_save_area *save = &svm->vmcb->save;
 
+	svm->vcpu.fpu_active = 1;
+
 	control->intercept_cr_read = 	INTERCEPT_CR0_MASK |
 					INTERCEPT_CR3_MASK |
 					INTERCEPT_CR4_MASK;
@@ -552,13 +554,19 @@
 	control->intercept_dr_read = 	INTERCEPT_DR0_MASK |
 					INTERCEPT_DR1_MASK |
 					INTERCEPT_DR2_MASK |
-					INTERCEPT_DR3_MASK;
+					INTERCEPT_DR3_MASK |
+					INTERCEPT_DR4_MASK |
+					INTERCEPT_DR5_MASK |
+					INTERCEPT_DR6_MASK |
+					INTERCEPT_DR7_MASK;
 
 	control->intercept_dr_write = 	INTERCEPT_DR0_MASK |
 					INTERCEPT_DR1_MASK |
 					INTERCEPT_DR2_MASK |
 					INTERCEPT_DR3_MASK |
+					INTERCEPT_DR4_MASK |
 					INTERCEPT_DR5_MASK |
+					INTERCEPT_DR6_MASK |
 					INTERCEPT_DR7_MASK;
 
 	control->intercept_exceptions = (1 << PF_VECTOR) |
@@ -569,6 +577,7 @@
 	control->intercept = 	(1ULL << INTERCEPT_INTR) |
 				(1ULL << INTERCEPT_NMI) |
 				(1ULL << INTERCEPT_SMI) |
+				(1ULL << INTERCEPT_SELECTIVE_CR0) |
 				(1ULL << INTERCEPT_CPUID) |
 				(1ULL << INTERCEPT_INVD) |
 				(1ULL << INTERCEPT_HLT) |
@@ -641,10 +650,8 @@
 		control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
 					(1ULL << INTERCEPT_INVLPG));
 		control->intercept_exceptions &= ~(1 << PF_VECTOR);
-		control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
-						INTERCEPT_CR3_MASK);
-		control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
-						 INTERCEPT_CR3_MASK);
+		control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
+		control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
 		save->g_pat = 0x0007040600070406ULL;
 		save->cr3 = 0;
 		save->cr4 = 0;
@@ -730,7 +737,6 @@
 	init_vmcb(svm);
 
 	fx_init(&svm->vcpu);
-	svm->vcpu.fpu_active = 1;
 	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
 	if (kvm_vcpu_is_bsp(&svm->vcpu))
 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -765,14 +771,16 @@
 	if (unlikely(cpu != vcpu->cpu)) {
 		u64 delta;
 
-		/*
-		 * Make sure that the guest sees a monotonically
-		 * increasing TSC.
-		 */
-		delta = vcpu->arch.host_tsc - native_read_tsc();
-		svm->vmcb->control.tsc_offset += delta;
-		if (is_nested(svm))
-			svm->nested.hsave->control.tsc_offset += delta;
+		if (check_tsc_unstable()) {
+			/*
+			 * Make sure that the guest sees a monotonically
+			 * increasing TSC.
+			 */
+			delta = vcpu->arch.host_tsc - native_read_tsc();
+			svm->vmcb->control.tsc_offset += delta;
+			if (is_nested(svm))
+				svm->nested.hsave->control.tsc_offset += delta;
+		}
 		vcpu->cpu = cpu;
 		kvm_migrate_timers(vcpu);
 		svm->asid_generation = 0;
@@ -954,42 +962,59 @@
 	svm->vmcb->save.gdtr.base = dt->base ;
 }
 
+static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
+{
+}
+
 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
 
+static void update_cr0_intercept(struct vcpu_svm *svm)
+{
+	ulong gcr0 = svm->vcpu.arch.cr0;
+	u64 *hcr0 = &svm->vmcb->save.cr0;
+
+	if (!svm->vcpu.fpu_active)
+		*hcr0 |= SVM_CR0_SELECTIVE_MASK;
+	else
+		*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
+			| (gcr0 & SVM_CR0_SELECTIVE_MASK);
+
+
+	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
+		svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
+		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
+	} else {
+		svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
+		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
+	}
+}
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.shadow_efer & EFER_LME) {
+	if (vcpu->arch.efer & EFER_LME) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
-			vcpu->arch.shadow_efer |= EFER_LMA;
+			vcpu->arch.efer |= EFER_LMA;
 			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
 		}
 
 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
-			vcpu->arch.shadow_efer &= ~EFER_LMA;
+			vcpu->arch.efer &= ~EFER_LMA;
 			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
 		}
 	}
 #endif
-	if (npt_enabled)
-		goto set;
-
-	if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
-		svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
-		vcpu->fpu_active = 1;
-	}
-
 	vcpu->arch.cr0 = cr0;
-	cr0 |= X86_CR0_PG | X86_CR0_WP;
-	if (!vcpu->fpu_active) {
-		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+
+	if (!npt_enabled)
+		cr0 |= X86_CR0_PG | X86_CR0_WP;
+
+	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-	}
-set:
 	/*
 	 * re-enable caching here because the QEMU bios
 	 * does not do it - this results in some delay at
@@ -997,6 +1022,7 @@
 	 */
 	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	svm->vmcb->save.cr0 = cr0;
+	update_cr0_intercept(svm);
 }
 
 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1102,76 +1128,70 @@
 	svm->vmcb->control.asid = sd->next_asid++;
 }
 
-static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
+static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	unsigned long val;
 
 	switch (dr) {
 	case 0 ... 3:
-		val = vcpu->arch.db[dr];
+		*dest = vcpu->arch.db[dr];
 		break;
+	case 4:
+		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+			return EMULATE_FAIL; /* will re-inject UD */
+		/* fall through */
 	case 6:
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-			val = vcpu->arch.dr6;
+			*dest = vcpu->arch.dr6;
 		else
-			val = svm->vmcb->save.dr6;
+			*dest = svm->vmcb->save.dr6;
 		break;
+	case 5:
+		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+			return EMULATE_FAIL; /* will re-inject UD */
+		/* fall through */
 	case 7:
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-			val = vcpu->arch.dr7;
+			*dest = vcpu->arch.dr7;
 		else
-			val = svm->vmcb->save.dr7;
+			*dest = svm->vmcb->save.dr7;
 		break;
-	default:
-		val = 0;
 	}
 
-	return val;
+	return EMULATE_DONE;
 }
 
-static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
-		       int *exception)
+static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	*exception = 0;
-
 	switch (dr) {
 	case 0 ... 3:
 		vcpu->arch.db[dr] = value;
 		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
 			vcpu->arch.eff_db[dr] = value;
-		return;
-	case 4 ... 5:
-		if (vcpu->arch.cr4 & X86_CR4_DE)
-			*exception = UD_VECTOR;
-		return;
+		break;
+	case 4:
+		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+			return EMULATE_FAIL; /* will re-inject UD */
+		/* fall through */
 	case 6:
-		if (value & 0xffffffff00000000ULL) {
-			*exception = GP_VECTOR;
-			return;
-		}
 		vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
-		return;
+		break;
+	case 5:
+		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+			return EMULATE_FAIL; /* will re-inject UD */
+		/* fall through */
 	case 7:
-		if (value & 0xffffffff00000000ULL) {
-			*exception = GP_VECTOR;
-			return;
-		}
 		vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
 		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
 			svm->vmcb->save.dr7 = vcpu->arch.dr7;
 			vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
 		}
-		return;
-	default:
-		/* FIXME: Possible case? */
-		printk(KERN_DEBUG "%s: unexpected dr %u\n",
-		       __func__, dr);
-		*exception = UD_VECTOR;
-		return;
+		break;
 	}
+
+	return EMULATE_DONE;
 }
 
 static int pf_interception(struct vcpu_svm *svm)
@@ -1239,13 +1259,17 @@
 	return 1;
 }
 
+static void svm_fpu_activate(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
+	svm->vcpu.fpu_active = 1;
+	update_cr0_intercept(svm);
+}
+
 static int nm_interception(struct vcpu_svm *svm)
 {
-	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
-	if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
-		svm->vmcb->save.cr0 &= ~X86_CR0_TS;
-	svm->vcpu.fpu_active = 1;
-
+	svm_fpu_activate(&svm->vcpu);
 	return 1;
 }
 
@@ -1337,7 +1361,7 @@
 
 static int nested_svm_check_permissions(struct vcpu_svm *svm)
 {
-	if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+	if (!(svm->vcpu.arch.efer & EFER_SVME)
 	    || !is_paging(&svm->vcpu)) {
 		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
 		return 1;
@@ -1740,8 +1764,8 @@
 	hsave->save.ds     = vmcb->save.ds;
 	hsave->save.gdtr   = vmcb->save.gdtr;
 	hsave->save.idtr   = vmcb->save.idtr;
-	hsave->save.efer   = svm->vcpu.arch.shadow_efer;
-	hsave->save.cr0    = svm->vcpu.arch.cr0;
+	hsave->save.efer   = svm->vcpu.arch.efer;
+	hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
 	hsave->save.cr4    = svm->vcpu.arch.cr4;
 	hsave->save.rflags = vmcb->save.rflags;
 	hsave->save.rip    = svm->next_rip;
@@ -2153,9 +2177,10 @@
 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
 	u64 data;
 
-	if (svm_get_msr(&svm->vcpu, ecx, &data))
+	if (svm_get_msr(&svm->vcpu, ecx, &data)) {
+		trace_kvm_msr_read_ex(ecx);
 		kvm_inject_gp(&svm->vcpu, 0);
-	else {
+	} else {
 		trace_kvm_msr_read(ecx, data);
 
 		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
@@ -2247,13 +2272,15 @@
 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-	trace_kvm_msr_write(ecx, data);
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
-	if (svm_set_msr(&svm->vcpu, ecx, data))
+	if (svm_set_msr(&svm->vcpu, ecx, data)) {
+		trace_kvm_msr_write_ex(ecx, data);
 		kvm_inject_gp(&svm->vcpu, 0);
-	else
+	} else {
+		trace_kvm_msr_write(ecx, data);
 		skip_emulated_instruction(&svm->vcpu);
+	}
 	return 1;
 }
 
@@ -2297,7 +2324,7 @@
 	[SVM_EXIT_READ_CR3]           		= emulate_on_interception,
 	[SVM_EXIT_READ_CR4]           		= emulate_on_interception,
 	[SVM_EXIT_READ_CR8]           		= emulate_on_interception,
-	/* for now: */
+	[SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception,
 	[SVM_EXIT_WRITE_CR0]          		= emulate_on_interception,
 	[SVM_EXIT_WRITE_CR3]          		= emulate_on_interception,
 	[SVM_EXIT_WRITE_CR4]          		= emulate_on_interception,
@@ -2306,11 +2333,17 @@
 	[SVM_EXIT_READ_DR1]			= emulate_on_interception,
 	[SVM_EXIT_READ_DR2]			= emulate_on_interception,
 	[SVM_EXIT_READ_DR3]			= emulate_on_interception,
+	[SVM_EXIT_READ_DR4]			= emulate_on_interception,
+	[SVM_EXIT_READ_DR5]			= emulate_on_interception,
+	[SVM_EXIT_READ_DR6]			= emulate_on_interception,
+	[SVM_EXIT_READ_DR7]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR0]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR1]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR2]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR3]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR4]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR5]			= emulate_on_interception,
+	[SVM_EXIT_WRITE_DR6]			= emulate_on_interception,
 	[SVM_EXIT_WRITE_DR7]			= emulate_on_interception,
 	[SVM_EXIT_EXCP_BASE + DB_VECTOR]	= db_interception,
 	[SVM_EXIT_EXCP_BASE + BP_VECTOR]	= bp_interception,
@@ -2383,20 +2416,10 @@
 
 	svm_complete_interrupts(svm);
 
-	if (npt_enabled) {
-		int mmu_reload = 0;
-		if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
-			svm_set_cr0(vcpu, svm->vmcb->save.cr0);
-			mmu_reload = 1;
-		}
+	if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
 		vcpu->arch.cr0 = svm->vmcb->save.cr0;
+	if (npt_enabled)
 		vcpu->arch.cr3 = svm->vmcb->save.cr3;
-		if (mmu_reload) {
-			kvm_mmu_reset_context(vcpu);
-			kvm_mmu_load(vcpu);
-		}
-	}
-
 
 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2798,12 +2821,6 @@
 
 	svm->vmcb->save.cr3 = root;
 	force_new_asid(vcpu);
-
-	if (vcpu->fpu_active) {
-		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
-		svm->vmcb->save.cr0 |= X86_CR0_TS;
-		vcpu->fpu_active = 0;
-	}
 }
 
 static int is_disabled(void)
@@ -2852,6 +2869,10 @@
 	return 0;
 }
 
+static void svm_cpuid_update(struct kvm_vcpu *vcpu)
+{
+}
+
 static const struct trace_print_flags svm_exit_reasons_str[] = {
 	{ SVM_EXIT_READ_CR0,           		"read_cr0" },
 	{ SVM_EXIT_READ_CR3,	      		"read_cr3" },
@@ -2905,9 +2926,22 @@
 	{ -1, NULL }
 };
 
-static bool svm_gb_page_enable(void)
+static int svm_get_lpage_level(void)
 {
-	return true;
+	return PT_PDPE_LEVEL;
+}
+
+static bool svm_rdtscp_supported(void)
+{
+	return false;
+}
+
+static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	update_cr0_intercept(svm);
+	svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
 }
 
 static struct kvm_x86_ops svm_x86_ops = {
@@ -2936,6 +2970,7 @@
 	.set_segment = svm_set_segment,
 	.get_cpl = svm_get_cpl,
 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
+	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
 	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
 	.set_cr0 = svm_set_cr0,
 	.set_cr3 = svm_set_cr3,
@@ -2950,6 +2985,8 @@
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
+	.fpu_activate = svm_fpu_activate,
+	.fpu_deactivate = svm_fpu_deactivate,
 
 	.tlb_flush = svm_flush_tlb,
 
@@ -2975,7 +3012,11 @@
 	.get_mt_mask = svm_get_mt_mask,
 
 	.exit_reasons_str = svm_exit_reasons_str,
-	.gb_page_enable = svm_gb_page_enable,
+	.get_lpage_level = svm_get_lpage_level,
+
+	.cpuid_update = svm_cpuid_update,
+
+	.rdtscp_supported = svm_rdtscp_supported,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 816e044..6ad30a2 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -56,6 +56,38 @@
 );
 
 /*
+ * Tracepoint for hypercall.
+ */
+TRACE_EVENT(kvm_hv_hypercall,
+	TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
+		 __u64 ingpa, __u64 outgpa),
+	TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
+
+	TP_STRUCT__entry(
+		__field(	__u16, 		code		)
+		__field(	bool,		fast		)
+		__field(	__u16,		rep_cnt		)
+		__field(	__u16,		rep_idx		)
+		__field(	__u64,		ingpa		)
+		__field(	__u64,		outgpa		)
+	),
+
+	TP_fast_assign(
+		__entry->code		= code;
+		__entry->fast		= fast;
+		__entry->rep_cnt	= rep_cnt;
+		__entry->rep_idx	= rep_idx;
+		__entry->ingpa		= ingpa;
+		__entry->outgpa		= outgpa;
+	),
+
+	TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
+		  __entry->code, __entry->fast ? "fast" : "slow",
+		  __entry->rep_cnt, __entry->rep_idx,  __entry->ingpa,
+		  __entry->outgpa)
+);
+
+/*
  * Tracepoint for PIO.
  */
 TRACE_EVENT(kvm_pio,
@@ -214,28 +246,33 @@
  * Tracepoint for guest MSR access.
  */
 TRACE_EVENT(kvm_msr,
-	TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
-	TP_ARGS(rw, ecx, data),
+	TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception),
+	TP_ARGS(write, ecx, data, exception),
 
 	TP_STRUCT__entry(
-		__field(	unsigned int,	rw		)
-		__field(	unsigned int,	ecx		)
-		__field(	unsigned long,	data		)
+		__field(	unsigned,	write		)
+		__field(	u32,		ecx		)
+		__field(	u64,		data		)
+		__field(	u8,		exception	)
 	),
 
 	TP_fast_assign(
-		__entry->rw		= rw;
+		__entry->write		= write;
 		__entry->ecx		= ecx;
 		__entry->data		= data;
+		__entry->exception	= exception;
 	),
 
-	TP_printk("msr_%s %x = 0x%lx",
-		  __entry->rw ? "write" : "read",
-		  __entry->ecx, __entry->data)
+	TP_printk("msr_%s %x = 0x%llx%s",
+		  __entry->write ? "write" : "read",
+		  __entry->ecx, __entry->data,
+		  __entry->exception ? " (#GP)" : "")
 );
 
-#define trace_kvm_msr_read(ecx, data)		trace_kvm_msr(0, ecx, data)
-#define trace_kvm_msr_write(ecx, data)		trace_kvm_msr(1, ecx, data)
+#define trace_kvm_msr_read(ecx, data)      trace_kvm_msr(0, ecx, data, false)
+#define trace_kvm_msr_write(ecx, data)     trace_kvm_msr(1, ecx, data, false)
+#define trace_kvm_msr_read_ex(ecx)         trace_kvm_msr(0, ecx, 0, true)
+#define trace_kvm_msr_write_ex(ecx, data)  trace_kvm_msr(1, ecx, data, true)
 
 /*
  * Tracepoint for guest CR access.
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d4918d6..14873b9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,6 +61,21 @@
 static int __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
+#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
+#define KVM_GUEST_CR0_MASK						\
+	(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
+	(X86_CR0_WP | X86_CR0_NE)
+#define KVM_VM_CR0_ALWAYS_ON						\
+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
+#define KVM_CR4_GUEST_OWNED_BITS				      \
+	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
+	 | X86_CR4_OSXMMEXCPT)
+
+#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
+#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
+
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
@@ -136,6 +151,8 @@
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
 	u32 exit_reason;
+
+	bool rdtscp_enabled;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -210,7 +227,7 @@
 #ifdef CONFIG_X86_64
 	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
-	MSR_EFER, MSR_K6_STAR,
+	MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
 };
 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
 
@@ -301,6 +318,11 @@
 	return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
 }
 
+static inline bool cpu_has_vmx_ept_1g_page(void)
+{
+	return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT);
+}
+
 static inline int cpu_has_vmx_invept_individual_addr(void)
 {
 	return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -336,9 +358,7 @@
 
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 {
-	return flexpriority_enabled &&
-		(cpu_has_vmx_virtualize_apic_accesses()) &&
-		(irqchip_in_kernel(kvm));
+	return flexpriority_enabled && irqchip_in_kernel(kvm);
 }
 
 static inline int cpu_has_vmx_vpid(void)
@@ -347,6 +367,12 @@
 		SECONDARY_EXEC_ENABLE_VPID;
 }
 
+static inline int cpu_has_vmx_rdtscp(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_RDTSCP;
+}
+
 static inline int cpu_has_virtual_nmis(void)
 {
 	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@@ -551,22 +577,18 @@
 {
 	u32 eb;
 
-	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
-	if (!vcpu->fpu_active)
-		eb |= 1u << NM_VECTOR;
-	/*
-	 * Unconditionally intercept #DB so we can maintain dr6 without
-	 * reading it every exit.
-	 */
-	eb |= 1u << DB_VECTOR;
-	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
-		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
-			eb |= 1u << BP_VECTOR;
-	}
+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
+	     (1u << NM_VECTOR) | (1u << DB_VECTOR);
+	if ((vcpu->guest_debug &
+	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
+	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
+		eb |= 1u << BP_VECTOR;
 	if (to_vmx(vcpu)->rmode.vm86_active)
 		eb = ~0;
 	if (enable_ept)
 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
+	if (vcpu->fpu_active)
+		eb &= ~(1u << NM_VECTOR);
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
@@ -589,7 +611,7 @@
 	u64 guest_efer;
 	u64 ignore_bits;
 
-	guest_efer = vmx->vcpu.arch.shadow_efer;
+	guest_efer = vmx->vcpu.arch.efer;
 
 	/*
 	 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -767,22 +789,30 @@
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
 {
+	ulong cr0;
+
 	if (vcpu->fpu_active)
 		return;
 	vcpu->fpu_active = 1;
-	vmcs_clear_bits(GUEST_CR0, X86_CR0_TS);
-	if (vcpu->arch.cr0 & X86_CR0_TS)
-		vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
+	cr0 = vmcs_readl(GUEST_CR0);
+	cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
+	cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
+	vmcs_writel(GUEST_CR0, cr0);
 	update_exception_bitmap(vcpu);
+	vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
+	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 }
 
+static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
+
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->fpu_active)
-		return;
-	vcpu->fpu_active = 0;
-	vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
+	vmx_decache_cr0_guest_bits(vcpu);
+	vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
 	update_exception_bitmap(vcpu);
+	vcpu->arch.cr0_guest_owned_bits = 0;
+	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
+	vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
 }
 
 static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
@@ -878,6 +908,11 @@
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
 }
 
+static bool vmx_rdtscp_supported(void)
+{
+	return cpu_has_vmx_rdtscp();
+}
+
 /*
  * Swap MSR entry in host/guest MSR entry array.
  */
@@ -913,12 +948,15 @@
 		index = __find_msr_index(vmx, MSR_CSTAR);
 		if (index >= 0)
 			move_msr_up(vmx, index, save_nmsrs++);
+		index = __find_msr_index(vmx, MSR_TSC_AUX);
+		if (index >= 0 && vmx->rdtscp_enabled)
+			move_msr_up(vmx, index, save_nmsrs++);
 		/*
 		 * MSR_K6_STAR is only needed on long mode guests, and only
 		 * if efer.sce is enabled.
 		 */
 		index = __find_msr_index(vmx, MSR_K6_STAR);
-		if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE))
+		if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
 			move_msr_up(vmx, index, save_nmsrs++);
 	}
 #endif
@@ -1002,6 +1040,10 @@
 	case MSR_IA32_SYSENTER_ESP:
 		data = vmcs_readl(GUEST_SYSENTER_ESP);
 		break;
+	case MSR_TSC_AUX:
+		if (!to_vmx(vcpu)->rdtscp_enabled)
+			return 1;
+		/* Otherwise falls through */
 	default:
 		vmx_load_host_state(to_vmx(vcpu));
 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
@@ -1065,7 +1107,15 @@
 			vcpu->arch.pat = data;
 			break;
 		}
-		/* Otherwise falls through to kvm_set_msr_common */
+		ret = kvm_set_msr_common(vcpu, msr_index, data);
+		break;
+	case MSR_TSC_AUX:
+		if (!vmx->rdtscp_enabled)
+			return 1;
+		/* Check reserved bit, higher 32 bits should be zero */
+		if ((data >> 32) != 0)
+			return 1;
+		/* Otherwise falls through */
 	default:
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
@@ -1224,6 +1274,8 @@
 	      CPU_BASED_USE_IO_BITMAPS |
 	      CPU_BASED_MOV_DR_EXITING |
 	      CPU_BASED_USE_TSC_OFFSETING |
+	      CPU_BASED_MWAIT_EXITING |
+	      CPU_BASED_MONITOR_EXITING |
 	      CPU_BASED_INVLPG_EXITING;
 	opt = CPU_BASED_TPR_SHADOW |
 	      CPU_BASED_USE_MSR_BITMAPS |
@@ -1243,7 +1295,8 @@
 			SECONDARY_EXEC_ENABLE_VPID |
 			SECONDARY_EXEC_ENABLE_EPT |
 			SECONDARY_EXEC_UNRESTRICTED_GUEST |
-			SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
+			SECONDARY_EXEC_RDTSCP;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -1457,8 +1510,12 @@
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots[0].base_gfn +
-				 kvm->memslots[0].npages - 3;
+		struct kvm_memslots *slots;
+		gfn_t base_gfn;
+
+		slots = rcu_dereference(kvm->memslots);
+		base_gfn = kvm->memslots->memslots[0].base_gfn +
+				 kvm->memslots->memslots[0].npages - 3;
 		return base_gfn << PAGE_SHIFT;
 	}
 	return kvm->arch.tss_addr;
@@ -1544,9 +1601,7 @@
 	 * of this msr depends on is_long_mode().
 	 */
 	vmx_load_host_state(to_vmx(vcpu));
-	vcpu->arch.shadow_efer = efer;
-	if (!msr)
-		return;
+	vcpu->arch.efer = efer;
 	if (efer & EFER_LMA) {
 		vmcs_write32(VM_ENTRY_CONTROLS,
 			     vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1576,13 +1631,13 @@
 			     (guest_tr_ar & ~AR_TYPE_MASK)
 			     | AR_TYPE_BUSY_64_TSS);
 	}
-	vcpu->arch.shadow_efer |= EFER_LMA;
-	vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
+	vcpu->arch.efer |= EFER_LMA;
+	vmx_set_efer(vcpu, vcpu->arch.efer);
 }
 
 static void exit_lmode(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.shadow_efer &= ~EFER_LMA;
+	vcpu->arch.efer &= ~EFER_LMA;
 
 	vmcs_write32(VM_ENTRY_CONTROLS,
 		     vmcs_read32(VM_ENTRY_CONTROLS)
@@ -1598,10 +1653,20 @@
 		ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
 }
 
+static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
+{
+	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
+
+	vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
+	vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
+}
+
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK;
-	vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
+	ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
+
+	vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
+	vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
 }
 
 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -1646,7 +1711,7 @@
 			     (CPU_BASED_CR3_LOAD_EXITING |
 			      CPU_BASED_CR3_STORE_EXITING));
 		vcpu->arch.cr0 = cr0;
-		vmx_set_cr4(vcpu, vcpu->arch.cr4);
+		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
 	} else if (!is_paging(vcpu)) {
 		/* From nonpaging to paging */
 		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -1654,23 +1719,13 @@
 			     ~(CPU_BASED_CR3_LOAD_EXITING |
 			       CPU_BASED_CR3_STORE_EXITING));
 		vcpu->arch.cr0 = cr0;
-		vmx_set_cr4(vcpu, vcpu->arch.cr4);
+		vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
 	}
 
 	if (!(cr0 & X86_CR0_WP))
 		*hw_cr0 &= ~X86_CR0_WP;
 }
 
-static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
-					struct kvm_vcpu *vcpu)
-{
-	if (!is_paging(vcpu)) {
-		*hw_cr4 &= ~X86_CR4_PAE;
-		*hw_cr4 |= X86_CR4_PSE;
-	} else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
-		*hw_cr4 &= ~X86_CR4_PAE;
-}
-
 static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1682,8 +1737,6 @@
 	else
 		hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
 
-	vmx_fpu_deactivate(vcpu);
-
 	if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
 		enter_pmode(vcpu);
 
@@ -1691,7 +1744,7 @@
 		enter_rmode(vcpu);
 
 #ifdef CONFIG_X86_64
-	if (vcpu->arch.shadow_efer & EFER_LME) {
+	if (vcpu->arch.efer & EFER_LME) {
 		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
 			enter_lmode(vcpu);
 		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
@@ -1702,12 +1755,12 @@
 	if (enable_ept)
 		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
+	if (!vcpu->fpu_active)
+		hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
+
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0, hw_cr0);
 	vcpu->arch.cr0 = cr0;
-
-	if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
-		vmx_fpu_activate(vcpu);
 }
 
 static u64 construct_eptp(unsigned long root_hpa)
@@ -1738,8 +1791,6 @@
 
 	vmx_flush_tlb(vcpu);
 	vmcs_writel(GUEST_CR3, guest_cr3);
-	if (vcpu->arch.cr0 & X86_CR0_PE)
-		vmx_fpu_deactivate(vcpu);
 }
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1748,8 +1799,14 @@
 		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
 	vcpu->arch.cr4 = cr4;
-	if (enable_ept)
-		ept_update_paging_mode_cr4(&hw_cr4, vcpu);
+	if (enable_ept) {
+		if (!is_paging(vcpu)) {
+			hw_cr4 &= ~X86_CR4_PAE;
+			hw_cr4 |= X86_CR4_PSE;
+		} else if (!(cr4 & X86_CR4_PAE)) {
+			hw_cr4 &= ~X86_CR4_PAE;
+		}
+	}
 
 	vmcs_writel(CR4_READ_SHADOW, cr4);
 	vmcs_writel(GUEST_CR4, hw_cr4);
@@ -1787,7 +1844,7 @@
 
 static int vmx_get_cpl(struct kvm_vcpu *vcpu)
 {
-	if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
+	if (!is_protmode(vcpu))
 		return 0;
 
 	if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
@@ -2042,7 +2099,7 @@
 static bool guest_state_valid(struct kvm_vcpu *vcpu)
 {
 	/* real mode guest state checks */
-	if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
+	if (!is_protmode(vcpu)) {
 		if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
 			return false;
 		if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -2175,7 +2232,7 @@
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -2188,7 +2245,7 @@
 
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -2197,7 +2254,7 @@
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.ept_identity_pagetable)
 		goto out;
 	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@@ -2212,7 +2269,7 @@
 	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
 			kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -2384,14 +2441,12 @@
 	for (i = 0; i < NR_VMX_MSR; ++i) {
 		u32 index = vmx_msr_index[i];
 		u32 data_low, data_high;
-		u64 data;
 		int j = vmx->nmsrs;
 
 		if (rdmsr_safe(index, &data_low, &data_high) < 0)
 			continue;
 		if (wrmsr_safe(index, data_low, data_high) < 0)
 			continue;
-		data = data_low | ((u64)data_high << 32);
 		vmx->guest_msrs[j].index = i;
 		vmx->guest_msrs[j].data = 0;
 		vmx->guest_msrs[j].mask = -1ull;
@@ -2404,7 +2459,10 @@
 	vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
 
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
-	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
+	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
+	if (enable_ept)
+		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
+	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 
 	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
 	rdtscll(tsc_this);
@@ -2429,10 +2487,10 @@
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 msr;
-	int ret;
+	int ret, idx;
 
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	if (!init_rmode(vmx->vcpu.kvm)) {
 		ret = -ENOMEM;
 		goto out;
@@ -2526,7 +2584,7 @@
 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
 	vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
-	vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
+	vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
 	vmx_set_cr4(&vmx->vcpu, 0);
 	vmx_set_efer(&vmx->vcpu, 0);
 	vmx_fpu_activate(&vmx->vcpu);
@@ -2540,7 +2598,7 @@
 	vmx->emulation_required = 0;
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	return ret;
 }
 
@@ -2717,6 +2775,12 @@
 		kvm_queue_exception(vcpu, vec);
 		return 1;
 	case BP_VECTOR:
+		/*
+		 * Update instruction length as we may reinject the exception
+		 * from user space while in guest debugging mode.
+		 */
+		to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
 			return 0;
 		/* fall through */
@@ -2839,6 +2903,13 @@
 		kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
 		/* fall through */
 	case BP_VECTOR:
+		/*
+		 * Update instruction length as we may reinject #BP from
+		 * user space while in guest debugging mode. Reading it for
+		 * #DB as well causes no harm, it is not used in that case.
+		 */
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
 		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
 		kvm_run->debug.arch.exception = ex_no;
@@ -2940,11 +3011,10 @@
 		};
 		break;
 	case 2: /* clts */
-		vmx_fpu_deactivate(vcpu);
-		vcpu->arch.cr0 &= ~X86_CR0_TS;
-		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
-		vmx_fpu_activate(vcpu);
+		vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
+		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
 		skip_emulated_instruction(vcpu);
+		vmx_fpu_activate(vcpu);
 		return 1;
 	case 1: /*mov from cr*/
 		switch (cr) {
@@ -2962,7 +3032,9 @@
 		}
 		break;
 	case 3: /* lmsw */
-		kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
+		val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
+		trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
+		kvm_lmsw(vcpu, val);
 
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -2975,12 +3047,22 @@
 	return 0;
 }
 
+static int check_dr_alias(struct kvm_vcpu *vcpu)
+{
+	if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return -1;
+	}
+	return 0;
+}
+
 static int handle_dr(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
 	unsigned long val;
 	int dr, reg;
 
+	/* Do not handle if the CPL > 0, will trigger GP on re-entry */
 	if (!kvm_require_cpl(vcpu, 0))
 		return 1;
 	dr = vmcs_readl(GUEST_DR7);
@@ -3016,14 +3098,20 @@
 		case 0 ... 3:
 			val = vcpu->arch.db[dr];
 			break;
+		case 4:
+			if (check_dr_alias(vcpu) < 0)
+				return 1;
+			/* fall through */
 		case 6:
 			val = vcpu->arch.dr6;
 			break;
-		case 7:
+		case 5:
+			if (check_dr_alias(vcpu) < 0)
+				return 1;
+			/* fall through */
+		default: /* 7 */
 			val = vcpu->arch.dr7;
 			break;
-		default:
-			val = 0;
 		}
 		kvm_register_write(vcpu, reg, val);
 	} else {
@@ -3034,21 +3122,25 @@
 			if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
 				vcpu->arch.eff_db[dr] = val;
 			break;
-		case 4 ... 5:
-			if (vcpu->arch.cr4 & X86_CR4_DE)
-				kvm_queue_exception(vcpu, UD_VECTOR);
-			break;
+		case 4:
+			if (check_dr_alias(vcpu) < 0)
+				return 1;
+			/* fall through */
 		case 6:
 			if (val & 0xffffffff00000000ULL) {
-				kvm_queue_exception(vcpu, GP_VECTOR);
-				break;
+				kvm_inject_gp(vcpu, 0);
+				return 1;
 			}
 			vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
 			break;
-		case 7:
+		case 5:
+			if (check_dr_alias(vcpu) < 0)
+				return 1;
+			/* fall through */
+		default: /* 7 */
 			if (val & 0xffffffff00000000ULL) {
-				kvm_queue_exception(vcpu, GP_VECTOR);
-				break;
+				kvm_inject_gp(vcpu, 0);
+				return 1;
 			}
 			vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
 			if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
@@ -3075,6 +3167,7 @@
 	u64 data;
 
 	if (vmx_get_msr(vcpu, ecx, &data)) {
+		trace_kvm_msr_read_ex(ecx);
 		kvm_inject_gp(vcpu, 0);
 		return 1;
 	}
@@ -3094,13 +3187,13 @@
 	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-	trace_kvm_msr_write(ecx, data);
-
 	if (vmx_set_msr(vcpu, ecx, data) != 0) {
+		trace_kvm_msr_write_ex(ecx, data);
 		kvm_inject_gp(vcpu, 0);
 		return 1;
 	}
 
+	trace_kvm_msr_write(ecx, data);
 	skip_emulated_instruction(vcpu);
 	return 1;
 }
@@ -3385,7 +3478,6 @@
 		}
 
 		if (err != EMULATE_DONE) {
-			kvm_report_emulation_failure(vcpu, "emulation failure");
 			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 			vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 			vcpu->run->internal.ndata = 0;
@@ -3416,6 +3508,12 @@
 	return 1;
 }
 
+static int handle_invalid_op(struct kvm_vcpu *vcpu)
+{
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -3453,6 +3551,8 @@
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
+	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_invalid_op,
+	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -3686,9 +3786,6 @@
 	 */
 	vmcs_writel(HOST_CR0, read_cr0());
 
-	if (vcpu->arch.switch_db_regs)
-		set_debugreg(vcpu->arch.dr6, 6);
-
 	asm(
 		/* Store host registers */
 		"push %%"R"dx; push %%"R"bp;"
@@ -3789,9 +3886,6 @@
 				  | (1 << VCPU_EXREG_PDPTR));
 	vcpu->arch.regs_dirty = 0;
 
-	if (vcpu->arch.switch_db_regs)
-		get_debugreg(vcpu->arch.dr6, 6);
-
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
@@ -3920,7 +4014,7 @@
 	 *   b. VT-d with snooping control feature: snooping control feature of
 	 *	VT-d engine can guarantee the cache correctness. Just set it
 	 *	to WB to keep consistent with host. So the same as item 3.
-	 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
 	 *    consistent with host MTRR
 	 */
 	if (is_mmio)
@@ -3931,37 +4025,88 @@
 		      VMX_EPT_MT_EPTE_SHIFT;
 	else
 		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
-			| VMX_EPT_IGMT_BIT;
+			| VMX_EPT_IPAT_BIT;
 
 	return ret;
 }
 
+#define _ER(x) { EXIT_REASON_##x, #x }
+
 static const struct trace_print_flags vmx_exit_reasons_str[] = {
-	{ EXIT_REASON_EXCEPTION_NMI,           "exception" },
-	{ EXIT_REASON_EXTERNAL_INTERRUPT,      "ext_irq" },
-	{ EXIT_REASON_TRIPLE_FAULT,            "triple_fault" },
-	{ EXIT_REASON_NMI_WINDOW,              "nmi_window" },
-	{ EXIT_REASON_IO_INSTRUCTION,          "io_instruction" },
-	{ EXIT_REASON_CR_ACCESS,               "cr_access" },
-	{ EXIT_REASON_DR_ACCESS,               "dr_access" },
-	{ EXIT_REASON_CPUID,                   "cpuid" },
-	{ EXIT_REASON_MSR_READ,                "rdmsr" },
-	{ EXIT_REASON_MSR_WRITE,               "wrmsr" },
-	{ EXIT_REASON_PENDING_INTERRUPT,       "interrupt_window" },
-	{ EXIT_REASON_HLT,                     "halt" },
-	{ EXIT_REASON_INVLPG,                  "invlpg" },
-	{ EXIT_REASON_VMCALL,                  "hypercall" },
-	{ EXIT_REASON_TPR_BELOW_THRESHOLD,     "tpr_below_thres" },
-	{ EXIT_REASON_APIC_ACCESS,             "apic_access" },
-	{ EXIT_REASON_WBINVD,                  "wbinvd" },
-	{ EXIT_REASON_TASK_SWITCH,             "task_switch" },
-	{ EXIT_REASON_EPT_VIOLATION,           "ept_violation" },
+	_ER(EXCEPTION_NMI),
+	_ER(EXTERNAL_INTERRUPT),
+	_ER(TRIPLE_FAULT),
+	_ER(PENDING_INTERRUPT),
+	_ER(NMI_WINDOW),
+	_ER(TASK_SWITCH),
+	_ER(CPUID),
+	_ER(HLT),
+	_ER(INVLPG),
+	_ER(RDPMC),
+	_ER(RDTSC),
+	_ER(VMCALL),
+	_ER(VMCLEAR),
+	_ER(VMLAUNCH),
+	_ER(VMPTRLD),
+	_ER(VMPTRST),
+	_ER(VMREAD),
+	_ER(VMRESUME),
+	_ER(VMWRITE),
+	_ER(VMOFF),
+	_ER(VMON),
+	_ER(CR_ACCESS),
+	_ER(DR_ACCESS),
+	_ER(IO_INSTRUCTION),
+	_ER(MSR_READ),
+	_ER(MSR_WRITE),
+	_ER(MWAIT_INSTRUCTION),
+	_ER(MONITOR_INSTRUCTION),
+	_ER(PAUSE_INSTRUCTION),
+	_ER(MCE_DURING_VMENTRY),
+	_ER(TPR_BELOW_THRESHOLD),
+	_ER(APIC_ACCESS),
+	_ER(EPT_VIOLATION),
+	_ER(EPT_MISCONFIG),
+	_ER(WBINVD),
 	{ -1, NULL }
 };
 
-static bool vmx_gb_page_enable(void)
+#undef _ER
+
+static int vmx_get_lpage_level(void)
 {
-	return false;
+	if (enable_ept && !cpu_has_vmx_ept_1g_page())
+		return PT_DIRECTORY_LEVEL;
+	else
+		/* For shadow and EPT supported 1GB page */
+		return PT_PDPE_LEVEL;
+}
+
+static inline u32 bit(int bitno)
+{
+	return 1 << (bitno & 31);
+}
+
+static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 exec_control;
+
+	vmx->rdtscp_enabled = false;
+	if (vmx_rdtscp_supported()) {
+		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+		if (exec_control & SECONDARY_EXEC_RDTSCP) {
+			best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+			if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
+				vmx->rdtscp_enabled = true;
+			else {
+				exec_control &= ~SECONDARY_EXEC_RDTSCP;
+				vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+						exec_control);
+			}
+		}
+	}
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -3990,6 +4135,7 @@
 	.set_segment = vmx_set_segment,
 	.get_cpl = vmx_get_cpl,
 	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+	.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
 	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
 	.set_cr0 = vmx_set_cr0,
 	.set_cr3 = vmx_set_cr3,
@@ -4002,6 +4148,8 @@
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
+	.fpu_activate = vmx_fpu_activate,
+	.fpu_deactivate = vmx_fpu_deactivate,
 
 	.tlb_flush = vmx_flush_tlb,
 
@@ -4027,7 +4175,11 @@
 	.get_mt_mask = vmx_get_mt_mask,
 
 	.exit_reasons_str = vmx_exit_reasons_str,
-	.gb_page_enable = vmx_gb_page_enable,
+	.get_lpage_level = vmx_get_lpage_level,
+
+	.cpuid_update = vmx_cpuid_update,
+
+	.rdtscp_supported = vmx_rdtscp_supported,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a1e1bc9..e46282a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -38,6 +38,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
 #include <linux/user-return-notifier.h>
+#include <linux/srcu.h>
 #include <trace/events/kvm.h>
 #undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
@@ -93,16 +94,16 @@
 
 struct kvm_shared_msrs_global {
 	int nr;
-	struct kvm_shared_msr {
-		u32 msr;
-		u64 value;
-	} msrs[KVM_NR_SHARED_MSRS];
+	u32 msrs[KVM_NR_SHARED_MSRS];
 };
 
 struct kvm_shared_msrs {
 	struct user_return_notifier urn;
 	bool registered;
-	u64 current_value[KVM_NR_SHARED_MSRS];
+	struct kvm_shared_msr_values {
+		u64 host;
+		u64 curr;
+	} values[KVM_NR_SHARED_MSRS];
 };
 
 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
@@ -147,53 +148,64 @@
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
 	unsigned slot;
-	struct kvm_shared_msr *global;
 	struct kvm_shared_msrs *locals
 		= container_of(urn, struct kvm_shared_msrs, urn);
+	struct kvm_shared_msr_values *values;
 
 	for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
-		global = &shared_msrs_global.msrs[slot];
-		if (global->value != locals->current_value[slot]) {
-			wrmsrl(global->msr, global->value);
-			locals->current_value[slot] = global->value;
+		values = &locals->values[slot];
+		if (values->host != values->curr) {
+			wrmsrl(shared_msrs_global.msrs[slot], values->host);
+			values->curr = values->host;
 		}
 	}
 	locals->registered = false;
 	user_return_notifier_unregister(urn);
 }
 
-void kvm_define_shared_msr(unsigned slot, u32 msr)
+static void shared_msr_update(unsigned slot, u32 msr)
 {
-	int cpu;
+	struct kvm_shared_msrs *smsr;
 	u64 value;
 
+	smsr = &__get_cpu_var(shared_msrs);
+	/* only read, and nobody should modify it at this time,
+	 * so don't need lock */
+	if (slot >= shared_msrs_global.nr) {
+		printk(KERN_ERR "kvm: invalid MSR slot!");
+		return;
+	}
+	rdmsrl_safe(msr, &value);
+	smsr->values[slot].host = value;
+	smsr->values[slot].curr = value;
+}
+
+void kvm_define_shared_msr(unsigned slot, u32 msr)
+{
 	if (slot >= shared_msrs_global.nr)
 		shared_msrs_global.nr = slot + 1;
-	shared_msrs_global.msrs[slot].msr = msr;
-	rdmsrl_safe(msr, &value);
-	shared_msrs_global.msrs[slot].value = value;
-	for_each_online_cpu(cpu)
-		per_cpu(shared_msrs, cpu).current_value[slot] = value;
+	shared_msrs_global.msrs[slot] = msr;
+	/* we need ensured the shared_msr_global have been updated */
+	smp_wmb();
 }
 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
 
 static void kvm_shared_msr_cpu_online(void)
 {
 	unsigned i;
-	struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
 
 	for (i = 0; i < shared_msrs_global.nr; ++i)
-		locals->current_value[i] = shared_msrs_global.msrs[i].value;
+		shared_msr_update(i, shared_msrs_global.msrs[i]);
 }
 
 void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 {
 	struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
 
-	if (((value ^ smsr->current_value[slot]) & mask) == 0)
+	if (((value ^ smsr->values[slot].curr) & mask) == 0)
 		return;
-	smsr->current_value[slot] = value;
-	wrmsrl(shared_msrs_global.msrs[slot].msr, value);
+	smsr->values[slot].curr = value;
+	wrmsrl(shared_msrs_global.msrs[slot], value);
 	if (!smsr->registered) {
 		smsr->urn.on_user_return = kvm_on_user_return;
 		user_return_notifier_register(&smsr->urn);
@@ -257,12 +269,68 @@
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
+#define EXCPT_BENIGN		0
+#define EXCPT_CONTRIBUTORY	1
+#define EXCPT_PF		2
+
+static int exception_class(int vector)
+{
+	switch (vector) {
+	case PF_VECTOR:
+		return EXCPT_PF;
+	case DE_VECTOR:
+	case TS_VECTOR:
+	case NP_VECTOR:
+	case SS_VECTOR:
+	case GP_VECTOR:
+		return EXCPT_CONTRIBUTORY;
+	default:
+		break;
+	}
+	return EXCPT_BENIGN;
+}
+
+static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
+		unsigned nr, bool has_error, u32 error_code)
+{
+	u32 prev_nr;
+	int class1, class2;
+
+	if (!vcpu->arch.exception.pending) {
+	queue:
+		vcpu->arch.exception.pending = true;
+		vcpu->arch.exception.has_error_code = has_error;
+		vcpu->arch.exception.nr = nr;
+		vcpu->arch.exception.error_code = error_code;
+		return;
+	}
+
+	/* to check exception */
+	prev_nr = vcpu->arch.exception.nr;
+	if (prev_nr == DF_VECTOR) {
+		/* triple fault -> shutdown */
+		set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+		return;
+	}
+	class1 = exception_class(prev_nr);
+	class2 = exception_class(nr);
+	if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
+		|| (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
+		/* generate double fault per SDM Table 5-5 */
+		vcpu->arch.exception.pending = true;
+		vcpu->arch.exception.has_error_code = true;
+		vcpu->arch.exception.nr = DF_VECTOR;
+		vcpu->arch.exception.error_code = 0;
+	} else
+		/* replace previous exception with a new one in a hope
+		   that instruction re-execution will regenerate lost
+		   exception */
+		goto queue;
+}
+
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
 {
-	WARN_ON(vcpu->arch.exception.pending);
-	vcpu->arch.exception.pending = true;
-	vcpu->arch.exception.has_error_code = false;
-	vcpu->arch.exception.nr = nr;
+	kvm_multiple_exception(vcpu, nr, false, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception);
 
@@ -270,25 +338,6 @@
 			   u32 error_code)
 {
 	++vcpu->stat.pf_guest;
-
-	if (vcpu->arch.exception.pending) {
-		switch(vcpu->arch.exception.nr) {
-		case DF_VECTOR:
-			/* triple fault -> shutdown */
-			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
-			return;
-		case PF_VECTOR:
-			vcpu->arch.exception.nr = DF_VECTOR;
-			vcpu->arch.exception.error_code = 0;
-			return;
-		default:
-			/* replace previous exception with a new one in a hope
-			   that instruction re-execution will regenerate lost
-			   exception */
-			vcpu->arch.exception.pending = false;
-			break;
-		}
-	}
 	vcpu->arch.cr2 = addr;
 	kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
 }
@@ -301,11 +350,7 @@
 
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 {
-	WARN_ON(vcpu->arch.exception.pending);
-	vcpu->arch.exception.pending = true;
-	vcpu->arch.exception.has_error_code = true;
-	vcpu->arch.exception.nr = nr;
-	vcpu->arch.exception.error_code = error_code;
+	kvm_multiple_exception(vcpu, nr, true, error_code);
 }
 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
 
@@ -383,12 +428,18 @@
 
 void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
-	if (cr0 & CR0_RESERVED_BITS) {
+	cr0 |= X86_CR0_ET;
+
+#ifdef CONFIG_X86_64
+	if (cr0 & 0xffffffff00000000UL) {
 		printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
-		       cr0, vcpu->arch.cr0);
+		       cr0, kvm_read_cr0(vcpu));
 		kvm_inject_gp(vcpu, 0);
 		return;
 	}
+#endif
+
+	cr0 &= ~CR0_RESERVED_BITS;
 
 	if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
 		printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
@@ -405,7 +456,7 @@
 
 	if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
 #ifdef CONFIG_X86_64
-		if ((vcpu->arch.shadow_efer & EFER_LME)) {
+		if ((vcpu->arch.efer & EFER_LME)) {
 			int cs_db, cs_l;
 
 			if (!is_pae(vcpu)) {
@@ -443,13 +494,13 @@
 
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
-	kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
+	kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f));
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-	unsigned long old_cr4 = vcpu->arch.cr4;
+	unsigned long old_cr4 = kvm_read_cr4(vcpu);
 	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
 
 	if (cr4 & CR4_RESERVED_BITS) {
@@ -575,9 +626,11 @@
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	2
+#define KVM_SAVE_MSRS_BEGIN	5
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_APIC_ASSIST_PAGE,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_K6_STAR,
 #ifdef CONFIG_X86_64
@@ -602,7 +655,7 @@
 	}
 
 	if (is_paging(vcpu)
-	    && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
+	    && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
 		printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
 		kvm_inject_gp(vcpu, 0);
 		return;
@@ -633,9 +686,9 @@
 	kvm_x86_ops->set_efer(vcpu, efer);
 
 	efer &= ~EFER_LMA;
-	efer |= vcpu->arch.shadow_efer & EFER_LMA;
+	efer |= vcpu->arch.efer & EFER_LMA;
 
-	vcpu->arch.shadow_efer = efer;
+	vcpu->arch.efer = efer;
 
 	vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
 	kvm_mmu_reset_context(vcpu);
@@ -957,6 +1010,100 @@
 	return r;
 }
 
+static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+	return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+	bool r = false;
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+	case HV_X64_MSR_HYPERCALL:
+		r = true;
+		break;
+	}
+
+	return r;
+}
+
+static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		kvm->arch.hv_guest_os_id = data;
+		/* setting guest os id to zero disables hypercall page */
+		if (!kvm->arch.hv_guest_os_id)
+			kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+		break;
+	case HV_X64_MSR_HYPERCALL: {
+		u64 gfn;
+		unsigned long addr;
+		u8 instructions[4];
+
+		/* if guest os id is not set hypercall should remain disabled */
+		if (!kvm->arch.hv_guest_os_id)
+			break;
+		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+			kvm->arch.hv_hypercall = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		kvm_x86_ops->patch_hypercall(vcpu, instructions);
+		((unsigned char *)instructions)[3] = 0xc3; /* ret */
+		if (copy_to_user((void __user *)addr, instructions, 4))
+			return 1;
+		kvm->arch.hv_hypercall = data;
+		break;
+	}
+	default:
+		pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+			  "data 0x%llx\n", msr, data);
+		return 1;
+	}
+	return 0;
+}
+
+static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	switch (msr) {
+	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+		unsigned long addr;
+
+		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+			vcpu->arch.hv_vapic = data;
+			break;
+		}
+		addr = gfn_to_hva(vcpu->kvm, data >>
+				  HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (clear_user((void __user *)addr, PAGE_SIZE))
+			return 1;
+		vcpu->arch.hv_vapic = data;
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+	default:
+		pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+			  "data 0x%llx\n", msr, data);
+		return 1;
+	}
+
+	return 0;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	switch (msr) {
@@ -1071,6 +1218,16 @@
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
 			"0x%x data 0x%llx\n", msr, data);
 		break;
+	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+		if (kvm_hv_msr_partition_wide(msr)) {
+			int r;
+			mutex_lock(&vcpu->kvm->lock);
+			r = set_msr_hyperv_pw(vcpu, msr, data);
+			mutex_unlock(&vcpu->kvm->lock);
+			return r;
+		} else
+			return set_msr_hyperv(vcpu, msr, data);
+		break;
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
@@ -1170,6 +1327,54 @@
 	return 0;
 }
 
+static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm *kvm = vcpu->kvm;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		data = kvm->arch.hv_guest_os_id;
+		break;
+	case HV_X64_MSR_HYPERCALL:
+		data = kvm->arch.hv_hypercall;
+		break;
+	default:
+		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+
+	*pdata = data;
+	return 0;
+}
+
+static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+
+	switch (msr) {
+	case HV_X64_MSR_VP_INDEX: {
+		int r;
+		struct kvm_vcpu *v;
+		kvm_for_each_vcpu(r, v, vcpu->kvm)
+			if (v == vcpu)
+				data = r;
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
+	default:
+		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 data;
@@ -1221,7 +1426,7 @@
 		data |= (((uint64_t)4ULL) << 40);
 		break;
 	case MSR_EFER:
-		data = vcpu->arch.shadow_efer;
+		data = vcpu->arch.efer;
 		break;
 	case MSR_KVM_WALL_CLOCK:
 		data = vcpu->kvm->arch.wall_clock;
@@ -1236,6 +1441,16 @@
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
 		return get_msr_mce(vcpu, msr, pdata);
+	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+		if (kvm_hv_msr_partition_wide(msr)) {
+			int r;
+			mutex_lock(&vcpu->kvm->lock);
+			r = get_msr_hyperv_pw(vcpu, msr, pdata);
+			mutex_unlock(&vcpu->kvm->lock);
+			return r;
+		} else
+			return get_msr_hyperv(vcpu, msr, pdata);
+		break;
 	default:
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1261,15 +1476,15 @@
 		    int (*do_msr)(struct kvm_vcpu *vcpu,
 				  unsigned index, u64 *data))
 {
-	int i;
+	int i, idx;
 
 	vcpu_load(vcpu);
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	for (i = 0; i < msrs->nmsrs; ++i)
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	vcpu_put(vcpu);
 
@@ -1351,6 +1566,11 @@
 	case KVM_CAP_XEN_HVM:
 	case KVM_CAP_ADJUST_CLOCK:
 	case KVM_CAP_VCPU_EVENTS:
+	case KVM_CAP_HYPERV:
+	case KVM_CAP_HYPERV_VAPIC:
+	case KVM_CAP_HYPERV_SPIN:
+	case KVM_CAP_PCI_SEGMENT:
+	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1464,8 +1684,8 @@
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
+	kvm_x86_ops->vcpu_put(vcpu);
 }
 
 static int is_efer_nx(void)
@@ -1530,6 +1750,7 @@
 	cpuid_fix_nx_cap(vcpu);
 	r = 0;
 	kvm_apic_set_version(vcpu);
+	kvm_x86_ops->cpuid_update(vcpu);
 
 out_free:
 	vfree(cpuid_entries);
@@ -1552,6 +1773,7 @@
 		goto out;
 	vcpu->arch.cpuid_nent = cpuid->nent;
 	kvm_apic_set_version(vcpu);
+	kvm_x86_ops->cpuid_update(vcpu);
 	return 0;
 
 out:
@@ -1594,12 +1816,15 @@
 			 u32 index, int *nent, int maxnent)
 {
 	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
-	unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
 #ifdef CONFIG_X86_64
+	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
+				? F(GBPAGES) : 0;
 	unsigned f_lm = F(LM);
 #else
+	unsigned f_gbpages = 0;
 	unsigned f_lm = 0;
 #endif
+	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -1619,7 +1844,7 @@
 		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
 		F(PAT) | F(PSE36) | 0 /* Reserved */ |
 		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-		F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
+		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
 		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
 	/* cpuid 1.ecx */
 	const u32 kvm_supported_word4_x86_features =
@@ -1866,7 +2091,7 @@
 		return 0;
 	if (mce->status & MCI_STATUS_UC) {
 		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
-		    !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+		    !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
 			printk(KERN_DEBUG "kvm: set_mce: "
 			       "injects mce exception while "
 			       "previous one is in progress!\n");
@@ -2160,14 +2385,14 @@
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	spin_lock(&kvm->mmu_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
 	spin_unlock(&kvm->mmu_lock);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return 0;
 }
 
@@ -2176,13 +2401,35 @@
 	return kvm->arch.n_alloc_mmu_pages;
 }
 
+gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+	struct kvm_mem_alias *alias;
+	struct kvm_mem_aliases *aliases;
+
+	aliases = rcu_dereference(kvm->arch.aliases);
+
+	for (i = 0; i < aliases->naliases; ++i) {
+		alias = &aliases->aliases[i];
+		if (alias->flags & KVM_ALIAS_INVALID)
+			continue;
+		if (gfn >= alias->base_gfn
+		    && gfn < alias->base_gfn + alias->npages)
+			return alias->target_gfn + gfn - alias->base_gfn;
+	}
+	return gfn;
+}
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 	struct kvm_mem_alias *alias;
+	struct kvm_mem_aliases *aliases;
 
-	for (i = 0; i < kvm->arch.naliases; ++i) {
-		alias = &kvm->arch.aliases[i];
+	aliases = rcu_dereference(kvm->arch.aliases);
+
+	for (i = 0; i < aliases->naliases; ++i) {
+		alias = &aliases->aliases[i];
 		if (gfn >= alias->base_gfn
 		    && gfn < alias->base_gfn + alias->npages)
 			return alias->target_gfn + gfn - alias->base_gfn;
@@ -2200,6 +2447,7 @@
 {
 	int r, n;
 	struct kvm_mem_alias *p;
+	struct kvm_mem_aliases *aliases, *old_aliases;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -2216,26 +2464,48 @@
 	    < alias->target_phys_addr)
 		goto out;
 
-	down_write(&kvm->slots_lock);
-	spin_lock(&kvm->mmu_lock);
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out;
 
-	p = &kvm->arch.aliases[alias->slot];
+	mutex_lock(&kvm->slots_lock);
+
+	/* invalidate any gfn reference in case of deletion/shrinking */
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
+	aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kvm_mmu_zap_all(kvm);
+	kfree(old_aliases);
+
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out_unlock;
+
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
+
+	p = &aliases->aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 	p->npages = alias->memory_size >> PAGE_SHIFT;
 	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
+	p->flags &= ~(KVM_ALIAS_INVALID);
 
 	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
-		if (kvm->arch.aliases[n - 1].npages)
+		if (aliases->aliases[n - 1].npages)
 			break;
-	kvm->arch.naliases = n;
+	aliases->naliases = n;
 
-	spin_unlock(&kvm->mmu_lock);
-	kvm_mmu_zap_all(kvm);
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(old_aliases);
+	r = 0;
 
-	up_write(&kvm->slots_lock);
-
-	return 0;
-
+out_unlock:
+	mutex_unlock(&kvm->slots_lock);
 out:
 	return r;
 }
@@ -2273,18 +2543,18 @@
 	r = 0;
 	switch (chip->chip_id) {
 	case KVM_IRQCHIP_PIC_MASTER:
-		spin_lock(&pic_irqchip(kvm)->lock);
+		raw_spin_lock(&pic_irqchip(kvm)->lock);
 		memcpy(&pic_irqchip(kvm)->pics[0],
 			&chip->chip.pic,
 			sizeof(struct kvm_pic_state));
-		spin_unlock(&pic_irqchip(kvm)->lock);
+		raw_spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
 	case KVM_IRQCHIP_PIC_SLAVE:
-		spin_lock(&pic_irqchip(kvm)->lock);
+		raw_spin_lock(&pic_irqchip(kvm)->lock);
 		memcpy(&pic_irqchip(kvm)->pics[1],
 			&chip->chip.pic,
 			sizeof(struct kvm_pic_state));
-		spin_unlock(&pic_irqchip(kvm)->lock);
+		raw_spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
 	case KVM_IRQCHIP_IOAPIC:
 		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
@@ -2364,29 +2634,62 @@
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				      struct kvm_dirty_log *log)
 {
-	int r;
-	int n;
+	int r, n, i;
 	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
+	unsigned long is_dirty = 0;
+	unsigned long *dirty_bitmap = NULL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
-	if (r)
+	r = -EINVAL;
+	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
+	memslot = &kvm->memslots->memslots[log->slot];
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+
+	r = -ENOMEM;
+	dirty_bitmap = vmalloc(n);
+	if (!dirty_bitmap)
+		goto out;
+	memset(dirty_bitmap, 0, n);
+
+	for (i = 0; !is_dirty && i < n/sizeof(long); i++)
+		is_dirty = memslot->dirty_bitmap[i];
+
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
+		struct kvm_memslots *slots, *old_slots;
+
 		spin_lock(&kvm->mmu_lock);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
 		spin_unlock(&kvm->mmu_lock);
-		memslot = &kvm->memslots[log->slot];
-		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
-		memset(memslot->dirty_bitmap, 0, n);
+
+		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+		if (!slots)
+			goto out_free;
+
+		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+		slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
+
+		old_slots = kvm->memslots;
+		rcu_assign_pointer(kvm->memslots, slots);
+		synchronize_srcu_expedited(&kvm->srcu);
+		dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
+		kfree(old_slots);
 	}
+
 	r = 0;
+	if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
+		r = -EFAULT;
+out_free:
+	vfree(dirty_bitmap);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -2469,6 +2772,8 @@
 		if (vpic) {
 			r = kvm_ioapic_init(kvm);
 			if (r) {
+				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+							  &vpic->dev);
 				kfree(vpic);
 				goto create_irqchip_unlock;
 			}
@@ -2480,10 +2785,8 @@
 		r = kvm_setup_default_irq_routing(kvm);
 		if (r) {
 			mutex_lock(&kvm->irq_lock);
-			kfree(kvm->arch.vpic);
-			kfree(kvm->arch.vioapic);
-			kvm->arch.vpic = NULL;
-			kvm->arch.vioapic = NULL;
+			kvm_ioapic_destroy(kvm);
+			kvm_destroy_pic(kvm);
 			mutex_unlock(&kvm->irq_lock);
 		}
 	create_irqchip_unlock:
@@ -2499,7 +2802,7 @@
 				   sizeof(struct kvm_pit_config)))
 			goto out;
 	create_pit:
-		down_write(&kvm->slots_lock);
+		mutex_lock(&kvm->slots_lock);
 		r = -EEXIST;
 		if (kvm->arch.vpit)
 			goto create_pit_unlock;
@@ -2508,7 +2811,7 @@
 		if (kvm->arch.vpit)
 			r = 0;
 	create_pit_unlock:
-		up_write(&kvm->slots_lock);
+		mutex_unlock(&kvm->slots_lock);
 		break;
 	case KVM_IRQ_LINE_STATUS:
 	case KVM_IRQ_LINE: {
@@ -2725,7 +3028,7 @@
 	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2734,17 +3037,44 @@
 	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
-static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
-			       struct kvm_vcpu *vcpu)
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+	return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+	access |= PFERR_FETCH_MASK;
+	return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+	access |= PFERR_WRITE_MASK;
+	return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+/* uses this to access any guest's mapped memory without checking CPL */
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+	return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
+}
+
+static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+				      struct kvm_vcpu *vcpu, u32 access,
+				      u32 *error)
 {
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
 	while (bytes) {
-		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
 		unsigned offset = addr & (PAGE_SIZE-1);
 		unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
 		int ret;
@@ -2767,14 +3097,37 @@
 	return r;
 }
 
+/* used for instruction fetching */
+static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
+				struct kvm_vcpu *vcpu, u32 *error)
+{
+	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
+					  access | PFERR_FETCH_MASK, error);
+}
+
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+			       struct kvm_vcpu *vcpu, u32 *error)
+{
+	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
+					  error);
+}
+
+static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
+			       struct kvm_vcpu *vcpu, u32 *error)
+{
+	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
+}
+
 static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
-				struct kvm_vcpu *vcpu)
+				struct kvm_vcpu *vcpu, u32 *error)
 {
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
 	while (bytes) {
-		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+		gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
 		unsigned offset = addr & (PAGE_SIZE-1);
 		unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
 		int ret;
@@ -2804,6 +3157,7 @@
 				  struct kvm_vcpu *vcpu)
 {
 	gpa_t                 gpa;
+	u32 error_code;
 
 	if (vcpu->mmio_read_completed) {
 		memcpy(val, vcpu->mmio_data, bytes);
@@ -2813,17 +3167,20 @@
 		return X86EMUL_CONTINUE;
 	}
 
-	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+	gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
+
+	if (gpa == UNMAPPED_GVA) {
+		kvm_inject_page_fault(vcpu, addr, error_code);
+		return X86EMUL_PROPAGATE_FAULT;
+	}
 
 	/* For APIC access vmexit */
 	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
 		goto mmio;
 
-	if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+	if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
 				== X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
-	if (gpa == UNMAPPED_GVA)
-		return X86EMUL_PROPAGATE_FAULT;
 
 mmio:
 	/*
@@ -2862,11 +3219,12 @@
 					   struct kvm_vcpu *vcpu)
 {
 	gpa_t                 gpa;
+	u32 error_code;
 
-	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
 
 	if (gpa == UNMAPPED_GVA) {
-		kvm_inject_page_fault(vcpu, addr, 2);
+		kvm_inject_page_fault(vcpu, addr, error_code);
 		return X86EMUL_PROPAGATE_FAULT;
 	}
 
@@ -2930,7 +3288,7 @@
 		char *kaddr;
 		u64 val;
 
-		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+		gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
 
 		if (gpa == UNMAPPED_GVA ||
 		   (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -2967,35 +3325,21 @@
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
+	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
+	kvm_x86_ops->fpu_activate(vcpu);
 	return X86EMUL_CONTINUE;
 }
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-	struct kvm_vcpu *vcpu = ctxt->vcpu;
-
-	switch (dr) {
-	case 0 ... 3:
-		*dest = kvm_x86_ops->get_dr(vcpu, dr);
-		return X86EMUL_CONTINUE;
-	default:
-		pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
-		return X86EMUL_UNHANDLEABLE;
-	}
+	return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest);
 }
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
 {
 	unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
-	int exception;
 
-	kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
-	if (exception) {
-		/* FIXME: better handling */
-		return X86EMUL_UNHANDLEABLE;
-	}
-	return X86EMUL_CONTINUE;
+	return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask);
 }
 
 void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -3009,7 +3353,7 @@
 
 	rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
 
-	kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
+	kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
 
 	printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
 	       context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -3017,7 +3361,8 @@
 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
 static struct x86_emulate_ops emulate_ops = {
-	.read_std            = kvm_read_guest_virt,
+	.read_std            = kvm_read_guest_virt_system,
+	.fetch               = kvm_fetch_guest_virt,
 	.read_emulated       = emulator_read_emulated,
 	.write_emulated      = emulator_write_emulated,
 	.cmpxchg_emulated    = emulator_cmpxchg_emulated,
@@ -3060,8 +3405,9 @@
 		vcpu->arch.emulate_ctxt.vcpu = vcpu;
 		vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
 		vcpu->arch.emulate_ctxt.mode =
+			(!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
 			(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
-			? X86EMUL_MODE_REAL : cs_l
+			? X86EMUL_MODE_VM86 : cs_l
 			? X86EMUL_MODE_PROT64 :	cs_db
 			? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 
@@ -3153,12 +3499,17 @@
 	gva_t q = vcpu->arch.pio.guest_gva;
 	unsigned bytes;
 	int ret;
+	u32 error_code;
 
 	bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
 	if (vcpu->arch.pio.in)
-		ret = kvm_write_guest_virt(q, p, bytes, vcpu);
+		ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
 	else
-		ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+		ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
+
+	if (ret == X86EMUL_PROPAGATE_FAULT)
+		kvm_inject_page_fault(vcpu, q, error_code);
+
 	return ret;
 }
 
@@ -3179,7 +3530,7 @@
 		if (io->in) {
 			r = pio_copy_data(vcpu);
 			if (r)
-				return r;
+				goto out;
 		}
 
 		delta = 1;
@@ -3206,7 +3557,7 @@
 			kvm_register_write(vcpu, VCPU_REGS_RSI, val);
 		}
 	}
-
+out:
 	io->count -= io->cur_count;
 	io->cur_count = 0;
 
@@ -3219,11 +3570,12 @@
 	int r;
 
 	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
 				    vcpu->arch.pio.size, pd);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
-				     vcpu->arch.pio.size, pd);
+		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+				     vcpu->arch.pio.port, vcpu->arch.pio.size,
+				     pd);
 	return r;
 }
 
@@ -3234,7 +3586,7 @@
 	int i, r = 0;
 
 	for (i = 0; i < io->cur_count; i++) {
-		if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+		if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
 				     io->port, io->size, pd)) {
 			r = -EOPNOTSUPP;
 			break;
@@ -3248,6 +3600,8 @@
 {
 	unsigned long val;
 
+	trace_kvm_pio(!in, port, size, 1);
+
 	vcpu->run->exit_reason = KVM_EXIT_IO;
 	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
 	vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3259,11 +3613,10 @@
 	vcpu->arch.pio.down = 0;
 	vcpu->arch.pio.rep = 0;
 
-	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
-		      size, 1);
-
-	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
-	memcpy(vcpu->arch.pio_data, &val, 4);
+	if (!vcpu->arch.pio.in) {
+		val = kvm_register_read(vcpu, VCPU_REGS_RAX);
+		memcpy(vcpu->arch.pio_data, &val, 4);
+	}
 
 	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
 		complete_pio(vcpu);
@@ -3280,6 +3633,8 @@
 	unsigned now, in_page;
 	int ret = 0;
 
+	trace_kvm_pio(!in, port, size, count);
+
 	vcpu->run->exit_reason = KVM_EXIT_IO;
 	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
 	vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3291,9 +3646,6 @@
 	vcpu->arch.pio.down = down;
 	vcpu->arch.pio.rep = rep;
 
-	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
-		      size, count);
-
 	if (!count) {
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 		return 1;
@@ -3325,10 +3677,8 @@
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
-		if (ret == X86EMUL_PROPAGATE_FAULT) {
-			kvm_inject_gp(vcpu, 0);
+		if (ret == X86EMUL_PROPAGATE_FAULT)
 			return 1;
-		}
 		if (ret == 0 && !pio_string_write(vcpu)) {
 			complete_pio(vcpu);
 			if (vcpu->arch.pio.count == 0)
@@ -3487,11 +3837,76 @@
 		return a0 | ((gpa_t)a1 << 32);
 }
 
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+	u64 param, ingpa, outgpa, ret;
+	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+	bool fast, longmode;
+	int cs_db, cs_l;
+
+	/*
+	 * hypercall generates UD from non zero cpl and real mode
+	 * per HYPER-V spec
+	 */
+	if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+	longmode = is_long_mode(vcpu) && cs_l == 1;
+
+	if (!longmode) {
+		param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
+		ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
+		outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+	}
+#endif
+
+	code = param & 0xffff;
+	fast = (param >> 16) & 0x1;
+	rep_cnt = (param >> 32) & 0xfff;
+	rep_idx = (param >> 48) & 0xfff;
+
+	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+	switch (code) {
+	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+		kvm_vcpu_on_spin(vcpu);
+		break;
+	default:
+		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		break;
+	}
+
+	ret = res | (((u64)rep_done & 0xfff) << 32);
+	if (longmode) {
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+	} else {
+		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+	}
+
+	return 1;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
 	int r = 1;
 
+	if (kvm_hv_hypercall_enabled(vcpu->kvm))
+		return kvm_hv_hypercall(vcpu);
+
 	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
 	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -3534,10 +3949,8 @@
 int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
 {
 	char instruction[3];
-	int ret = 0;
 	unsigned long rip = kvm_rip_read(vcpu);
 
-
 	/*
 	 * Blow out the MMU to ensure that no other VCPU has an active mapping
 	 * to ensure that the updated hypercall appears atomically across all
@@ -3546,11 +3959,8 @@
 	kvm_mmu_zap_all(vcpu->kvm);
 
 	kvm_x86_ops->patch_hypercall(vcpu, instruction);
-	if (emulator_write_emulated(rip, instruction, 3, vcpu)
-	    != X86EMUL_CONTINUE)
-		ret = -EFAULT;
 
-	return ret;
+	return emulator_write_emulated(rip, instruction, 3, vcpu);
 }
 
 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -3583,10 +3993,9 @@
 {
 	unsigned long value;
 
-	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
 	switch (cr) {
 	case 0:
-		value = vcpu->arch.cr0;
+		value = kvm_read_cr0(vcpu);
 		break;
 	case 2:
 		value = vcpu->arch.cr2;
@@ -3595,7 +4004,7 @@
 		value = vcpu->arch.cr3;
 		break;
 	case 4:
-		value = vcpu->arch.cr4;
+		value = kvm_read_cr4(vcpu);
 		break;
 	case 8:
 		value = kvm_get_cr8(vcpu);
@@ -3613,7 +4022,7 @@
 {
 	switch (cr) {
 	case 0:
-		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
+		kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
 		*rflags = kvm_get_rflags(vcpu);
 		break;
 	case 2:
@@ -3623,7 +4032,7 @@
 		kvm_set_cr3(vcpu, val);
 		break;
 	case 4:
-		kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
+		kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
 		break;
 	case 8:
 		kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3690,6 +4099,7 @@
 	}
 	return best;
 }
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
 {
@@ -3773,14 +4183,15 @@
 static void vapic_exit(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	int idx;
 
 	if (!apic || !apic->vapic_addr)
 		return;
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_release_page_dirty(apic->vapic_page);
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -3876,12 +4287,17 @@
 			r = 0;
 			goto out;
 		}
+		if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+			vcpu->fpu_active = 0;
+			kvm_x86_ops->fpu_deactivate(vcpu);
+		}
 	}
 
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
-	kvm_load_guest_fpu(vcpu);
+	if (vcpu->fpu_active)
+		kvm_load_guest_fpu(vcpu);
 
 	local_irq_disable();
 
@@ -3909,7 +4325,7 @@
 		kvm_lapic_sync_to_vapic(vcpu);
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
 	kvm_guest_enter();
 
@@ -3951,7 +4367,7 @@
 
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	/*
 	 * Profile KVM exit RIPs:
@@ -3973,6 +4389,7 @@
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int r;
+	struct kvm *kvm = vcpu->kvm;
 
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
 		pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -3984,7 +4401,7 @@
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	vapic_enter(vcpu);
 
 	r = 1;
@@ -3992,9 +4409,9 @@
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 			r = vcpu_enter_guest(vcpu);
 		else {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_block(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
 			{
 				switch(vcpu->arch.mp_state) {
@@ -4029,13 +4446,13 @@
 			++vcpu->stat.signal_exits;
 		}
 		if (need_resched()) {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_resched(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 	post_kvm_run_save(vcpu);
 
 	vapic_exit(vcpu);
@@ -4074,10 +4491,10 @@
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
 
-		down_read(&vcpu->kvm->slots_lock);
+		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
 					EMULTYPE_NO_DECODE);
-		up_read(&vcpu->kvm->slots_lock);
+		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 		if (r == EMULATE_DO_MMIO) {
 			/*
 			 * Read-modify-write.  Back to userspace.
@@ -4204,13 +4621,12 @@
 	sregs->gdt.limit = dt.limit;
 	sregs->gdt.base = dt.base;
 
-	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-	sregs->cr0 = vcpu->arch.cr0;
+	sregs->cr0 = kvm_read_cr0(vcpu);
 	sregs->cr2 = vcpu->arch.cr2;
 	sregs->cr3 = vcpu->arch.cr3;
-	sregs->cr4 = vcpu->arch.cr4;
+	sregs->cr4 = kvm_read_cr4(vcpu);
 	sregs->cr8 = kvm_get_cr8(vcpu);
-	sregs->efer = vcpu->arch.shadow_efer;
+	sregs->efer = vcpu->arch.efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
 	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
@@ -4298,14 +4714,23 @@
 {
 	struct descriptor_table dtable;
 	u16 index = selector >> 3;
+	int ret;
+	u32 err;
+	gva_t addr;
 
 	get_segment_descriptor_dtable(vcpu, selector, &dtable);
 
 	if (dtable.limit < index * 8 + 7) {
 		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
-		return 1;
+		return X86EMUL_PROPAGATE_FAULT;
 	}
-	return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
+	addr = dtable.base + index * 8;
+	ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
+					 vcpu,  &err);
+	if (ret == X86EMUL_PROPAGATE_FAULT)
+		kvm_inject_page_fault(vcpu, addr, err);
+
+       return ret;
 }
 
 /* allowed just for 8 bytes segments */
@@ -4319,15 +4744,23 @@
 
 	if (dtable.limit < index * 8 + 7)
 		return 1;
-	return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
+	return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
 }
 
-static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
+static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
+			       struct desc_struct *seg_desc)
+{
+	u32 base_addr = get_desc_base(seg_desc);
+
+	return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
+}
+
+static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
 			     struct desc_struct *seg_desc)
 {
 	u32 base_addr = get_desc_base(seg_desc);
 
-	return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
+	return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
 }
 
 static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -4338,18 +4771,6 @@
 	return kvm_seg.selector;
 }
 
-static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
-						u16 selector,
-						struct kvm_segment *kvm_seg)
-{
-	struct desc_struct seg_desc;
-
-	if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
-		return 1;
-	seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
-	return 0;
-}
-
 static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
 {
 	struct kvm_segment segvar = {
@@ -4367,7 +4788,7 @@
 		.unusable = 0,
 	};
 	kvm_x86_ops->set_segment(vcpu, &segvar, seg);
-	return 0;
+	return X86EMUL_CONTINUE;
 }
 
 static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
@@ -4377,24 +4798,112 @@
 		(kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
 }
 
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
-				int type_bits, int seg)
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
 {
 	struct kvm_segment kvm_seg;
+	struct desc_struct seg_desc;
+	u8 dpl, rpl, cpl;
+	unsigned err_vec = GP_VECTOR;
+	u32 err_code = 0;
+	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
+	int ret;
 
-	if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE))
+	if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
 		return kvm_load_realmode_segment(vcpu, selector, seg);
-	if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
-		return 1;
-	kvm_seg.type |= type_bits;
 
-	if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
-	    seg != VCPU_SREG_LDTR)
-		if (!kvm_seg.s)
-			kvm_seg.unusable = 1;
+	/* NULL selector is not valid for TR, CS and SS */
+	if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
+	    && null_selector)
+		goto exception;
 
+	/* TR should be in GDT only */
+	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
+		goto exception;
+
+	ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
+	if (ret)
+		return ret;
+
+	seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
+
+	if (null_selector) { /* for NULL selector skip all following checks */
+		kvm_seg.unusable = 1;
+		goto load;
+	}
+
+	err_code = selector & 0xfffc;
+	err_vec = GP_VECTOR;
+
+	/* can't load system descriptor into segment selecor */
+	if (seg <= VCPU_SREG_GS && !kvm_seg.s)
+		goto exception;
+
+	if (!kvm_seg.present) {
+		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
+		goto exception;
+	}
+
+	rpl = selector & 3;
+	dpl = kvm_seg.dpl;
+	cpl = kvm_x86_ops->get_cpl(vcpu);
+
+	switch (seg) {
+	case VCPU_SREG_SS:
+		/*
+		 * segment is not a writable data segment or segment
+		 * selector's RPL != CPL or segment selector's RPL != CPL
+		 */
+		if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
+			goto exception;
+		break;
+	case VCPU_SREG_CS:
+		if (!(kvm_seg.type & 8))
+			goto exception;
+
+		if (kvm_seg.type & 4) {
+			/* conforming */
+			if (dpl > cpl)
+				goto exception;
+		} else {
+			/* nonconforming */
+			if (rpl > cpl || dpl != cpl)
+				goto exception;
+		}
+		/* CS(RPL) <- CPL */
+		selector = (selector & 0xfffc) | cpl;
+            break;
+	case VCPU_SREG_TR:
+		if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
+			goto exception;
+		break;
+	case VCPU_SREG_LDTR:
+		if (kvm_seg.s || kvm_seg.type != 2)
+			goto exception;
+		break;
+	default: /*  DS, ES, FS, or GS */
+		/*
+		 * segment is not a data or readable code segment or
+		 * ((segment is a data or nonconforming code segment)
+		 * and (both RPL and CPL > DPL))
+		 */
+		if ((kvm_seg.type & 0xa) == 0x8 ||
+		    (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
+			goto exception;
+		break;
+	}
+
+	if (!kvm_seg.unusable && kvm_seg.s) {
+		/* mark segment as accessed */
+		kvm_seg.type |= 1;
+		seg_desc.type |= 1;
+		save_guest_segment_descriptor(vcpu, selector, &seg_desc);
+	}
+load:
 	kvm_set_segment(vcpu, &kvm_seg, seg);
-	return 0;
+	return X86EMUL_CONTINUE;
+exception:
+	kvm_queue_exception_e(vcpu, err_vec, err_code);
+	return X86EMUL_PROPAGATE_FAULT;
 }
 
 static void save_state_to_tss32(struct kvm_vcpu *vcpu,
@@ -4420,6 +4929,14 @@
 	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
 }
 
+static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
+{
+	struct kvm_segment kvm_seg;
+	kvm_get_segment(vcpu, &kvm_seg, seg);
+	kvm_seg.selector = sel;
+	kvm_set_segment(vcpu, &kvm_seg, seg);
+}
+
 static int load_state_from_tss32(struct kvm_vcpu *vcpu,
 				  struct tss_segment_32 *tss)
 {
@@ -4437,25 +4954,41 @@
 	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
 	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
 
-	if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
+	/*
+	 * SDM says that segment selectors are loaded before segment
+	 * descriptors
+	 */
+	kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
+	kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+	kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+	kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+	kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+	kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
+	kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
+
+	/*
+	 * Now load segment descriptors. If fault happenes at this stage
+	 * it is handled in a context of new task
+	 */
+	if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+	if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+	if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
+	if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
+	if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
 		return 1;
 	return 0;
 }
@@ -4495,19 +5028,33 @@
 	kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
 	kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
 
-	if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
+	/*
+	 * SDM says that segment selectors are loaded before segment
+	 * descriptors
+	 */
+	kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
+	kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
+	kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
+	kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
+	kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
+
+	/*
+	 * Now load segment descriptors. If fault happenes at this stage
+	 * it is handled in a context of new task
+	 */
+	if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+	if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+	if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
 		return 1;
 
-	if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+	if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
 		return 1;
 	return 0;
 }
@@ -4529,7 +5076,7 @@
 			    sizeof tss_segment_16))
 		goto out;
 
-	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
+	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
 			   &tss_segment_16, sizeof tss_segment_16))
 		goto out;
 
@@ -4537,7 +5084,7 @@
 		tss_segment_16.prev_task_link = old_tss_sel;
 
 		if (kvm_write_guest(vcpu->kvm,
-				    get_tss_base_addr(vcpu, nseg_desc),
+				    get_tss_base_addr_write(vcpu, nseg_desc),
 				    &tss_segment_16.prev_task_link,
 				    sizeof tss_segment_16.prev_task_link))
 			goto out;
@@ -4568,7 +5115,7 @@
 			    sizeof tss_segment_32))
 		goto out;
 
-	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
+	if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
 			   &tss_segment_32, sizeof tss_segment_32))
 		goto out;
 
@@ -4576,7 +5123,7 @@
 		tss_segment_32.prev_task_link = old_tss_sel;
 
 		if (kvm_write_guest(vcpu->kvm,
-				    get_tss_base_addr(vcpu, nseg_desc),
+				    get_tss_base_addr_write(vcpu, nseg_desc),
 				    &tss_segment_32.prev_task_link,
 				    sizeof tss_segment_32.prev_task_link))
 			goto out;
@@ -4599,7 +5146,7 @@
 	u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
 	u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
 
-	old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
+	old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
 
 	/* FIXME: Handle errors. Failure to read either TSS or their
 	 * descriptors should generate a pagefault.
@@ -4658,7 +5205,7 @@
 					      &nseg_desc);
 	}
 
-	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
+	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
 	seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
 	tr_seg.type = 11;
 	kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
@@ -4689,17 +5236,15 @@
 
 	kvm_set_cr8(vcpu, sregs->cr8);
 
-	mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
+	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
 	kvm_x86_ops->set_efer(vcpu, sregs->efer);
 	kvm_set_apic_base(vcpu, sregs->apic_base);
 
-	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-
-	mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
+	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
 	kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
 	vcpu->arch.cr0 = sregs->cr0;
 
-	mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
+	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
 	if (!is_long_mode(vcpu) && is_pae(vcpu)) {
 		load_pdptrs(vcpu, vcpu->arch.cr3);
@@ -4734,7 +5279,7 @@
 	/* Older userspace won't unhalt the vcpu on reset. */
 	if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
 	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
-	    !(vcpu->arch.cr0 & X86_CR0_PE))
+	    !is_protmode(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
 	vcpu_put(vcpu);
@@ -4832,11 +5377,12 @@
 {
 	unsigned long vaddr = tr->linear_address;
 	gpa_t gpa;
+	int idx;
 
 	vcpu_load(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
-	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-	up_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
@@ -4917,14 +5463,14 @@
 
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
+	if (vcpu->guest_fpu_loaded)
 		return;
 
 	vcpu->guest_fpu_loaded = 1;
 	kvm_fx_save(&vcpu->arch.host_fx_image);
 	kvm_fx_restore(&vcpu->arch.guest_fx_image);
+	trace_kvm_fpu(1);
 }
-EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
@@ -4935,8 +5481,9 @@
 	kvm_fx_save(&vcpu->arch.guest_fx_image);
 	kvm_fx_restore(&vcpu->arch.host_fx_image);
 	++vcpu->stat.fpu_reload;
+	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
+	trace_kvm_fpu(0);
 }
-EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
@@ -5088,11 +5635,13 @@
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
+	int idx;
+
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_mmu_destroy(vcpu);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
 }
 
@@ -5103,6 +5652,12 @@
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
+	kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!kvm->arch.aliases) {
+		kfree(kvm);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
@@ -5159,16 +5714,18 @@
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	cleanup_srcu_struct(&kvm->srcu);
+	kfree(kvm->arch.aliases);
 	kfree(kvm);
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
 				struct kvm_memory_slot old,
+				struct kvm_userspace_memory_region *mem,
 				int user_alloc)
 {
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	int npages = memslot->npages;
 
 	/*To keep backward compatibility with older userspace,
 	 *x86 needs to hanlde !user_alloc case.
@@ -5188,26 +5745,35 @@
 			if (IS_ERR((void *)userspace_addr))
 				return PTR_ERR((void *)userspace_addr);
 
-			/* set userspace_addr atomically for kvm_hva_to_rmapp */
-			spin_lock(&kvm->mmu_lock);
 			memslot->userspace_addr = userspace_addr;
-			spin_unlock(&kvm->mmu_lock);
-		} else {
-			if (!old.user_alloc && old.rmap) {
-				int ret;
-
-				down_write(&current->mm->mmap_sem);
-				ret = do_munmap(current->mm, old.userspace_addr,
-						old.npages * PAGE_SIZE);
-				up_write(&current->mm->mmap_sem);
-				if (ret < 0)
-					printk(KERN_WARNING
-				       "kvm_vm_ioctl_set_memory_region: "
-				       "failed to munmap memory\n");
-			}
 		}
 	}
 
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+
+	int npages = mem->memory_size >> PAGE_SHIFT;
+
+	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+		int ret;
+
+		down_write(&current->mm->mmap_sem);
+		ret = do_munmap(current->mm, old.userspace_addr,
+				old.npages * PAGE_SIZE);
+		up_write(&current->mm->mmap_sem);
+		if (ret < 0)
+			printk(KERN_WARNING
+			       "kvm_vm_ioctl_set_memory_region: "
+			       "failed to munmap memory\n");
+	}
+
 	spin_lock(&kvm->mmu_lock);
 	if (!kvm->arch.n_requested_mmu_pages) {
 		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -5216,8 +5782,6 @@
 
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	spin_unlock(&kvm->mmu_lock);
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5eadea5..2d10163 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
 #define ARCH_X86_KVM_X86_H
 
 #include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
 
 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 {
@@ -35,4 +36,33 @@
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
                                              u32 function, u32 index);
 
+static inline bool is_protmode(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr0_bits(vcpu, X86_CR0_PE);
+}
+
+static inline int is_long_mode(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+	return vcpu->arch.efer & EFER_LMA;
+#else
+	return 0;
+#endif
+}
+
+static inline int is_pae(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
+}
+
+static inline int is_pse(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
+}
+
+static inline int is_paging(struct kvm_vcpu *vcpu)
+{
+	return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
+}
+
 #endif
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 33a4ff4..b8c59b8 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -78,7 +78,6 @@
 	walk->data -= walk->offset;
 
 	if (nbytes && walk->offset & alignmask && !err) {
-		walk->offset += alignmask - 1;
 		walk->offset = ALIGN(walk->offset, alignmask + 1);
 		walk->data += walk->offset;
 
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 1887090..2bb7348 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -386,11 +386,13 @@
 {
 	struct crypto_aead *authenc = crypto_aead_reqtfm(req);
 	struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
-	struct ablkcipher_request *abreq = aead_request_ctx(req);
+	struct authenc_request_ctx *areq_ctx = aead_request_ctx(req);
 	struct crypto_ablkcipher *enc = ctx->enc;
 	struct scatterlist *dst = req->dst;
 	unsigned int cryptlen = req->cryptlen;
-	u8 *iv = (u8 *)(abreq + 1) + crypto_ablkcipher_reqsize(enc);
+	struct ablkcipher_request *abreq = (void *)(areq_ctx->tail
+						    + ctx->reqoff);
+	u8 *iv = (u8 *)abreq - crypto_ablkcipher_ivsize(enc);
 	int err;
 
 	ablkcipher_request_set_tfm(abreq, enc);
@@ -454,7 +456,7 @@
 	unsigned int authsize;
 
 	areq_ctx->complete = authenc_verify_ahash_done;
-	areq_ctx->complete = authenc_verify_ahash_update_done;
+	areq_ctx->update_complete = authenc_verify_ahash_update_done;
 
 	ohash = authenc_ahash_fn(req, CRYPTO_TFM_REQ_MAY_SLEEP);
 	if (IS_ERR(ohash))
@@ -546,10 +548,6 @@
 	if (IS_ERR(auth))
 		return PTR_ERR(auth);
 
-	ctx->reqoff = ALIGN(2 * crypto_ahash_digestsize(auth) +
-			    crypto_ahash_alignmask(auth),
-			    crypto_ahash_alignmask(auth) + 1);
-
 	enc = crypto_spawn_skcipher(&ictx->enc);
 	err = PTR_ERR(enc);
 	if (IS_ERR(enc))
@@ -558,13 +556,18 @@
 	ctx->auth = auth;
 	ctx->enc = enc;
 
-	tfm->crt_aead.reqsize = max_t(unsigned int,
-				crypto_ahash_reqsize(auth) + ctx->reqoff +
-				sizeof(struct authenc_request_ctx) +
+	ctx->reqoff = ALIGN(2 * crypto_ahash_digestsize(auth) +
+			    crypto_ahash_alignmask(auth),
+			    crypto_ahash_alignmask(auth) + 1) +
+		      crypto_ablkcipher_ivsize(enc);
+
+	tfm->crt_aead.reqsize = sizeof(struct authenc_request_ctx) +
+				ctx->reqoff +
+				max_t(unsigned int,
+				crypto_ahash_reqsize(auth) +
 				sizeof(struct ahash_request),
 				sizeof(struct skcipher_givcrypt_request) +
-				crypto_ablkcipher_reqsize(enc) +
-				crypto_ablkcipher_ivsize(enc));
+				crypto_ablkcipher_reqsize(enc));
 
 	return 0;
 
diff --git a/crypto/md5.c b/crypto/md5.c
index 9fda213..30efc7d 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -234,6 +234,7 @@
 	.export		=	md5_export,
 	.import		=	md5_import,
 	.descsize	=	sizeof(struct md5_state),
+	.statesize	=	sizeof(struct md5_state),
 	.base		=	{
 		.cra_name	=	"md5",
 		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 8a713f1..919a285 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -11,6 +11,9 @@
 #include <asm/smp.h>
 #include "agp.h"
 
+int intel_agp_enabled;
+EXPORT_SYMBOL(intel_agp_enabled);
+
 /*
  * If we have Intel graphics, we're not going to have anything other than
  * an Intel IOMMU. So make the correct use of the PCI DMA API contingent
@@ -65,6 +68,10 @@
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB	    0x0062
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB    0x006a
 #define PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG	    0x0046
+#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB  0x0100
+#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_IG  0x0102
+#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB  0x0104
+#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_IG  0x0106
 
 /* cover 915 and 945 variants */
 #define IS_I915 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB || \
@@ -99,7 +106,9 @@
 		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB || \
 		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB || \
 		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB || \
-		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB)
+		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB || \
+		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB || \
+		agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB)
 
 extern int agp_memory_reserved;
 
@@ -148,6 +157,25 @@
 #define INTEL_I7505_AGPCTRL	0x70
 #define INTEL_I7505_MCHCFG	0x50
 
+#define SNB_GMCH_CTRL	0x50
+#define SNB_GMCH_GMS_STOLEN_MASK	0xF8
+#define SNB_GMCH_GMS_STOLEN_32M		(1 << 3)
+#define SNB_GMCH_GMS_STOLEN_64M		(2 << 3)
+#define SNB_GMCH_GMS_STOLEN_96M		(3 << 3)
+#define SNB_GMCH_GMS_STOLEN_128M	(4 << 3)
+#define SNB_GMCH_GMS_STOLEN_160M	(5 << 3)
+#define SNB_GMCH_GMS_STOLEN_192M	(6 << 3)
+#define SNB_GMCH_GMS_STOLEN_224M	(7 << 3)
+#define SNB_GMCH_GMS_STOLEN_256M	(8 << 3)
+#define SNB_GMCH_GMS_STOLEN_288M	(9 << 3)
+#define SNB_GMCH_GMS_STOLEN_320M	(0xa << 3)
+#define SNB_GMCH_GMS_STOLEN_352M	(0xb << 3)
+#define SNB_GMCH_GMS_STOLEN_384M	(0xc << 3)
+#define SNB_GMCH_GMS_STOLEN_416M	(0xd << 3)
+#define SNB_GMCH_GMS_STOLEN_448M	(0xe << 3)
+#define SNB_GMCH_GMS_STOLEN_480M	(0xf << 3)
+#define SNB_GMCH_GMS_STOLEN_512M	(0x10 << 3)
+
 static const struct aper_size_info_fixed intel_i810_sizes[] =
 {
 	{64, 16384, 4},
@@ -294,6 +322,13 @@
 					off_t pg_start, int mask_type)
 {
 	int i, j;
+	u32 cache_bits = 0;
+
+	if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB ||
+	    agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB)
+	{
+		cache_bits = I830_PTE_SYSTEM_CACHED;
+	}
 
 	for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
 		writel(agp_bridge->driver->mask_memory(agp_bridge,
@@ -614,7 +649,7 @@
 static void intel_i830_init_gtt_entries(void)
 {
 	u16 gmch_ctrl;
-	int gtt_entries;
+	int gtt_entries = 0;
 	u8 rdct;
 	int local = 0;
 	static const int ddt[4] = { 0, 16, 32, 64 };
@@ -706,6 +741,63 @@
 			gtt_entries = 0;
 			break;
 		}
+	} else if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB ||
+		   agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB) {
+		/*
+		 * SandyBridge has new memory control reg at 0x50.w
+		 */
+		u16 snb_gmch_ctl;
+		pci_read_config_word(intel_private.pcidev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+		switch (snb_gmch_ctl & SNB_GMCH_GMS_STOLEN_MASK) {
+		case SNB_GMCH_GMS_STOLEN_32M:
+			gtt_entries = MB(32) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_64M:
+			gtt_entries = MB(64) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_96M:
+			gtt_entries = MB(96) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_128M:
+			gtt_entries = MB(128) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_160M:
+			gtt_entries = MB(160) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_192M:
+			gtt_entries = MB(192) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_224M:
+			gtt_entries = MB(224) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_256M:
+			gtt_entries = MB(256) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_288M:
+			gtt_entries = MB(288) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_320M:
+			gtt_entries = MB(320) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_352M:
+			gtt_entries = MB(352) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_384M:
+			gtt_entries = MB(384) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_416M:
+			gtt_entries = MB(416) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_448M:
+			gtt_entries = MB(448) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_480M:
+			gtt_entries = MB(480) - KB(size);
+			break;
+		case SNB_GMCH_GMS_STOLEN_512M:
+			gtt_entries = MB(512) - KB(size);
+			break;
+		}
 	} else {
 		switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
 		case I855_GMCH_GMS_STOLEN_1M:
@@ -1357,6 +1449,8 @@
 	case PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB:
 	case PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB:
 	case PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB:
+	case PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB:
+	case PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB:
 		*gtt_offset = *gtt_size = MB(2);
 		break;
 	default:
@@ -2338,9 +2432,9 @@
 		NULL, &intel_g33_driver },
 	{ PCI_DEVICE_ID_INTEL_Q33_HB, PCI_DEVICE_ID_INTEL_Q33_IG, 0, "Q33",
 		NULL, &intel_g33_driver },
-	{ PCI_DEVICE_ID_INTEL_PINEVIEW_M_HB, PCI_DEVICE_ID_INTEL_PINEVIEW_M_IG, 0, "Pineview",
+	{ PCI_DEVICE_ID_INTEL_PINEVIEW_M_HB, PCI_DEVICE_ID_INTEL_PINEVIEW_M_IG, 0, "GMA3150",
 		NULL, &intel_g33_driver },
-	{ PCI_DEVICE_ID_INTEL_PINEVIEW_HB, PCI_DEVICE_ID_INTEL_PINEVIEW_IG, 0, "Pineview",
+	{ PCI_DEVICE_ID_INTEL_PINEVIEW_HB, PCI_DEVICE_ID_INTEL_PINEVIEW_IG, 0, "GMA3150",
 		NULL, &intel_g33_driver },
 	{ PCI_DEVICE_ID_INTEL_GM45_HB, PCI_DEVICE_ID_INTEL_GM45_IG, 0,
 	    "GM45", NULL, &intel_i965_driver },
@@ -2355,13 +2449,17 @@
 	{ PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0,
 	    "G41", NULL, &intel_i965_driver },
 	{ PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG, 0,
-	    "Ironlake/D", NULL, &intel_i965_driver },
+	    "HD Graphics", NULL, &intel_i965_driver },
 	{ PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG, 0,
-	    "Ironlake/M", NULL, &intel_i965_driver },
+	    "HD Graphics", NULL, &intel_i965_driver },
 	{ PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG, 0,
-	    "Ironlake/MA", NULL, &intel_i965_driver },
+	    "HD Graphics", NULL, &intel_i965_driver },
 	{ PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG, 0,
-	    "Ironlake/MC2", NULL, &intel_i965_driver },
+	    "HD Graphics", NULL, &intel_i965_driver },
+	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_IG, 0,
+	    "Sandybridge", NULL, &intel_i965_driver },
+	{ PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_IG, 0,
+	    "Sandybridge", NULL, &intel_i965_driver },
 	{ 0, 0, 0, NULL, NULL, NULL }
 };
 
@@ -2371,7 +2469,7 @@
 	struct agp_bridge_data *bridge;
 	u8 cap_ptr = 0;
 	struct resource *r;
-	int i;
+	int i, err;
 
 	cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
 
@@ -2463,7 +2561,10 @@
 	}
 
 	pci_set_drvdata(pdev, bridge);
-	return agp_add_bridge(bridge);
+	err = agp_add_bridge(bridge);
+	if (!err)
+		intel_agp_enabled = 1;
+	return err;
 }
 
 static void __devexit agp_intel_remove(struct pci_dev *pdev)
@@ -2568,6 +2669,8 @@
 	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB),
 	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB),
 	ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB),
+	ID(PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB),
+	ID(PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB),
 	{ }
 };
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index e02d74b..c27f80e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -13,6 +13,22 @@
 	  DMA Device drivers supported by the configured arch, it may
 	  be empty in some cases.
 
+config DMADEVICES_DEBUG
+        bool "DMA Engine debugging"
+        depends on DMADEVICES != n
+        help
+          This is an option for use by developers; most people should
+          say N here.  This enables DMA engine core and driver debugging.
+
+config DMADEVICES_VDEBUG
+        bool "DMA Engine verbose debugging"
+        depends on DMADEVICES_DEBUG != n
+        help
+          This is an option for use by developers; most people should
+          say N here.  This enables deeper (more verbose) debugging of
+          the DMA engine core and drivers.
+
+
 if DMADEVICES
 
 comment "DMA Devices"
@@ -69,6 +85,13 @@
 	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
 	  Elo Plus is the DMA controller on 85xx and 86xx parts.
 
+config MPC512X_DMA
+	tristate "Freescale MPC512x built-in DMA engine support"
+	depends on PPC_MPC512x
+	select DMA_ENGINE
+	---help---
+	  Enable support for the Freescale MPC512x built-in DMA engine.
+
 config MV_XOR
 	bool "Marvell XOR engine support"
 	depends on PLAT_ORION
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 807053d..22bba3d 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,9 +1,17 @@
+ifeq ($(CONFIG_DMADEVICES_DEBUG),y)
+	EXTRA_CFLAGS	+= -DDEBUG
+endif
+ifeq ($(CONFIG_DMADEVICES_VDEBUG),y)
+	EXTRA_CFLAGS	+= -DVERBOSE_DEBUG
+endif
+
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_NET_DMA) += iovlock.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o
 obj-$(CONFIG_MV_XOR) += mv_xor.o
 obj-$(CONFIG_DW_DMAC) += dw_dmac.o
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 64a9372..1656fdc 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -39,7 +39,6 @@
 	unsigned int sg_len;
 	struct coh901318_lli *data;
 	enum dma_data_direction dir;
-	int pending_irqs;
 	unsigned long flags;
 };
 
@@ -72,7 +71,6 @@
 
 	unsigned long nbr_active_done;
 	unsigned long busy;
-	int pending_irqs;
 
 	struct coh901318_base *base;
 };
@@ -80,18 +78,16 @@
 static void coh901318_list_print(struct coh901318_chan *cohc,
 				 struct coh901318_lli *lli)
 {
-	struct coh901318_lli *l;
-	dma_addr_t addr =  virt_to_phys(lli);
+	struct coh901318_lli *l = lli;
 	int i = 0;
 
-	while (addr) {
-		l = phys_to_virt(addr);
+	while (l) {
 		dev_vdbg(COHC_2_DEV(cohc), "i %d, lli %p, ctrl 0x%x, src 0x%x"
-			 ", dst 0x%x, link 0x%x link_virt 0x%p\n",
+			 ", dst 0x%x, link 0x%x virt_link_addr 0x%p\n",
 			 i, l, l->control, l->src_addr, l->dst_addr,
-			 l->link_addr, phys_to_virt(l->link_addr));
+			 l->link_addr, l->virt_link_addr);
 		i++;
-		addr = l->link_addr;
+		l = l->virt_link_addr;
 	}
 }
 
@@ -125,7 +121,7 @@
 		goto err_kmalloc;
 	tmp = dev_buf;
 
-	tmp += sprintf(tmp, "DMA -- enable dma channels\n");
+	tmp += sprintf(tmp, "DMA -- enabled dma channels\n");
 
 	for (i = 0; i < debugfs_dma_base->platform->max_channels; i++)
 		if (started_channels & (1 << i))
@@ -337,16 +333,22 @@
 		 * TODO: alloc a pile of descs instead of just one,
 		 * avoid many small allocations.
 		 */
-		desc = kmalloc(sizeof(struct coh901318_desc), GFP_NOWAIT);
+		desc = kzalloc(sizeof(struct coh901318_desc), GFP_NOWAIT);
 		if (desc == NULL)
 			goto out;
 		INIT_LIST_HEAD(&desc->node);
+		dma_async_tx_descriptor_init(&desc->desc, &cohc->chan);
 	} else {
 		/* Reuse an old desc. */
 		desc = list_first_entry(&cohc->free,
 					struct coh901318_desc,
 					node);
 		list_del(&desc->node);
+		/* Initialize it a bit so it's not insane */
+		desc->sg = NULL;
+		desc->sg_len = 0;
+		desc->desc.callback = NULL;
+		desc->desc.callback_param = NULL;
 	}
 
  out:
@@ -364,10 +366,6 @@
 coh901318_desc_submit(struct coh901318_chan *cohc, struct coh901318_desc *desc)
 {
 	list_add_tail(&desc->node, &cohc->active);
-
-	BUG_ON(cohc->pending_irqs != 0);
-
-	cohc->pending_irqs = desc->pending_irqs;
 }
 
 static struct coh901318_desc *
@@ -592,6 +590,10 @@
 	return cohd_que;
 }
 
+/*
+ * This tasklet is called from the interrupt handler to
+ * handle each descriptor (DMA job) that is sent to a channel.
+ */
 static void dma_tasklet(unsigned long data)
 {
 	struct coh901318_chan *cohc = (struct coh901318_chan *) data;
@@ -600,55 +602,58 @@
 	dma_async_tx_callback callback;
 	void *callback_param;
 
+	dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d"
+		 " nbr_active_done %ld\n", __func__,
+		 cohc->id, cohc->nbr_active_done);
+
 	spin_lock_irqsave(&cohc->lock, flags);
 
-	/* get first active entry from list */
+	/* get first active descriptor entry from list */
 	cohd_fin = coh901318_first_active_get(cohc);
 
-	BUG_ON(cohd_fin->pending_irqs == 0);
-
 	if (cohd_fin == NULL)
 		goto err;
 
-	cohd_fin->pending_irqs--;
-	cohc->completed = cohd_fin->desc.cookie;
-
-	if (cohc->nbr_active_done == 0)
-		return;
-
-	if (!cohd_fin->pending_irqs) {
-		/* release the lli allocation*/
-		coh901318_lli_free(&cohc->base->pool, &cohd_fin->data);
-	}
-
-	dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d pending_irqs %d"
-		 " nbr_active_done %ld\n", __func__,
-		 cohc->id, cohc->pending_irqs, cohc->nbr_active_done);
-
-	/* callback to client */
+	/* locate callback to client */
 	callback = cohd_fin->desc.callback;
 	callback_param = cohd_fin->desc.callback_param;
 
-	if (!cohd_fin->pending_irqs) {
-		coh901318_desc_remove(cohd_fin);
+	/* sign this job as completed on the channel */
+	cohc->completed = cohd_fin->desc.cookie;
 
-		/* return desc to free-list */
-		coh901318_desc_free(cohc, cohd_fin);
-	}
+	/* release the lli allocation and remove the descriptor */
+	coh901318_lli_free(&cohc->base->pool, &cohd_fin->data);
 
-	if (cohc->nbr_active_done)
-		cohc->nbr_active_done--;
+	/* return desc to free-list */
+	coh901318_desc_remove(cohd_fin);
+	coh901318_desc_free(cohc, cohd_fin);
 
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	/* Call the callback when we're done */
+	if (callback)
+		callback(callback_param);
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/*
+	 * If another interrupt fired while the tasklet was scheduling,
+	 * we don't get called twice, so we have this number of active
+	 * counter that keep track of the number of IRQs expected to
+	 * be handled for this channel. If there happen to be more than
+	 * one IRQ to be ack:ed, we simply schedule this tasklet again.
+	 */
+	cohc->nbr_active_done--;
 	if (cohc->nbr_active_done) {
+		dev_dbg(COHC_2_DEV(cohc), "scheduling tasklet again, new IRQs "
+			"came in while we were scheduling this tasklet\n");
 		if (cohc_chan_conf(cohc)->priority_high)
 			tasklet_hi_schedule(&cohc->tasklet);
 		else
 			tasklet_schedule(&cohc->tasklet);
 	}
-	spin_unlock_irqrestore(&cohc->lock, flags);
 
-	if (callback)
-		callback(callback_param);
+	spin_unlock_irqrestore(&cohc->lock, flags);
 
 	return;
 
@@ -667,16 +672,17 @@
 	if (!cohc->allocated)
 		return;
 
-	BUG_ON(cohc->pending_irqs == 0);
+	spin_lock(&cohc->lock);
 
-	cohc->pending_irqs--;
 	cohc->nbr_active_done++;
 
-	if (cohc->pending_irqs == 0 && coh901318_queue_start(cohc) == NULL)
+	if (coh901318_queue_start(cohc) == NULL)
 		cohc->busy = 0;
 
 	BUG_ON(list_empty(&cohc->active));
 
+	spin_unlock(&cohc->lock);
+
 	if (cohc_chan_conf(cohc)->priority_high)
 		tasklet_hi_schedule(&cohc->tasklet);
 	else
@@ -870,6 +876,7 @@
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
 	int lli_len;
 	u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+	int ret;
 
 	spin_lock_irqsave(&cohc->lock, flg);
 
@@ -890,22 +897,19 @@
 	if (data == NULL)
 		goto err;
 
-	cohd = coh901318_desc_get(cohc);
-	cohd->sg = NULL;
-	cohd->sg_len = 0;
-	cohd->data = data;
-
-	cohd->pending_irqs =
-		coh901318_lli_fill_memcpy(
-				&cohc->base->pool, data, src, size, dest,
-				cohc_chan_param(cohc)->ctrl_lli_chained,
-				ctrl_last);
-	cohd->flags = flags;
+	ret = coh901318_lli_fill_memcpy(
+		&cohc->base->pool, data, src, size, dest,
+		cohc_chan_param(cohc)->ctrl_lli_chained,
+		ctrl_last);
+	if (ret)
+		goto err;
 
 	COH_DBG(coh901318_list_print(cohc, data));
 
-	dma_async_tx_descriptor_init(&cohd->desc, chan);
-
+	/* Pick a descriptor to handle this transfer */
+	cohd = coh901318_desc_get(cohc);
+	cohd->data = data;
+	cohd->flags = flags;
 	cohd->desc.tx_submit = coh901318_tx_submit;
 
 	spin_unlock_irqrestore(&cohc->lock, flg);
@@ -924,6 +928,7 @@
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
 	struct coh901318_lli *data;
 	struct coh901318_desc *cohd;
+	const struct coh901318_params *params;
 	struct scatterlist *sg;
 	int len = 0;
 	int size;
@@ -931,7 +936,9 @@
 	u32 ctrl_chained = cohc_chan_param(cohc)->ctrl_lli_chained;
 	u32 ctrl = cohc_chan_param(cohc)->ctrl_lli;
 	u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+	u32 config;
 	unsigned long flg;
+	int ret;
 
 	if (!sgl)
 		goto out;
@@ -947,15 +954,14 @@
 		/* Trigger interrupt after last lli */
 		ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
 
-	cohd = coh901318_desc_get(cohc);
-	cohd->sg = NULL;
-	cohd->sg_len = 0;
-	cohd->dir = direction;
+	params = cohc_chan_param(cohc);
+	config = params->config;
 
 	if (direction == DMA_TO_DEVICE) {
 		u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
 			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
 
+		config |= COH901318_CX_CFG_RM_MEMORY_TO_PRIMARY;
 		ctrl_chained |= tx_flags;
 		ctrl_last |= tx_flags;
 		ctrl |= tx_flags;
@@ -963,16 +969,14 @@
 		u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
 			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
 
+		config |= COH901318_CX_CFG_RM_PRIMARY_TO_MEMORY;
 		ctrl_chained |= rx_flags;
 		ctrl_last |= rx_flags;
 		ctrl |= rx_flags;
 	} else
 		goto err_direction;
 
-	dma_async_tx_descriptor_init(&cohd->desc, chan);
-
-	cohd->desc.tx_submit = coh901318_tx_submit;
-
+	coh901318_set_conf(cohc, config);
 
 	/* The dma only supports transmitting packages up to
 	 * MAX_DMA_PACKET_SIZE. Calculate to total number of
@@ -994,32 +998,37 @@
 		len += factor;
 	}
 
+	pr_debug("Allocate %d lli:s for this transfer\n", len);
 	data = coh901318_lli_alloc(&cohc->base->pool, len);
 
 	if (data == NULL)
 		goto err_dma_alloc;
 
 	/* initiate allocated data list */
-	cohd->pending_irqs =
-		coh901318_lli_fill_sg(&cohc->base->pool, data, sgl, sg_len,
-				      cohc_dev_addr(cohc),
-				      ctrl_chained,
-				      ctrl,
-				      ctrl_last,
-				      direction, COH901318_CX_CTRL_TC_IRQ_ENABLE);
-	cohd->data = data;
-
-	cohd->flags = flags;
+	ret = coh901318_lli_fill_sg(&cohc->base->pool, data, sgl, sg_len,
+				    cohc_dev_addr(cohc),
+				    ctrl_chained,
+				    ctrl,
+				    ctrl_last,
+				    direction, COH901318_CX_CTRL_TC_IRQ_ENABLE);
+	if (ret)
+		goto err_lli_fill;
 
 	COH_DBG(coh901318_list_print(cohc, data));
 
+	/* Pick a descriptor to handle this transfer */
+	cohd = coh901318_desc_get(cohc);
+	cohd->dir = direction;
+	cohd->flags = flags;
+	cohd->desc.tx_submit = coh901318_tx_submit;
+	cohd->data = data;
+
 	spin_unlock_irqrestore(&cohc->lock, flg);
 
 	return &cohd->desc;
+ err_lli_fill:
  err_dma_alloc:
  err_direction:
-	coh901318_desc_remove(cohd);
-	coh901318_desc_free(cohc, cohd);
 	spin_unlock_irqrestore(&cohc->lock, flg);
  out:
 	return NULL;
@@ -1092,9 +1101,8 @@
 		/* release the lli allocation*/
 		coh901318_lli_free(&cohc->base->pool, &cohd->data);
 
-		coh901318_desc_remove(cohd);
-
 		/* return desc to free-list */
+		coh901318_desc_remove(cohd);
 		coh901318_desc_free(cohc, cohd);
 	}
 
@@ -1102,16 +1110,14 @@
 		/* release the lli allocation*/
 		coh901318_lli_free(&cohc->base->pool, &cohd->data);
 
-		coh901318_desc_remove(cohd);
-
 		/* return desc to free-list */
+		coh901318_desc_remove(cohd);
 		coh901318_desc_free(cohc, cohd);
 	}
 
 
 	cohc->nbr_active_done = 0;
 	cohc->busy = 0;
-	cohc->pending_irqs = 0;
 
 	spin_unlock_irqrestore(&cohc->lock, flags);
 }
@@ -1138,7 +1144,6 @@
 
 			spin_lock_init(&cohc->lock);
 
-			cohc->pending_irqs = 0;
 			cohc->nbr_active_done = 0;
 			cohc->busy = 0;
 			INIT_LIST_HEAD(&cohc->free);
@@ -1254,12 +1259,17 @@
 	base->dma_memcpy.device_issue_pending = coh901318_issue_pending;
 	base->dma_memcpy.device_terminate_all = coh901318_terminate_all;
 	base->dma_memcpy.dev = &pdev->dev;
+	/*
+	 * This controller can only access address at even 32bit boundaries,
+	 * i.e. 2^2
+	 */
+	base->dma_memcpy.copy_align = 2;
 	err = dma_async_device_register(&base->dma_memcpy);
 
 	if (err)
 		goto err_register_memcpy;
 
-	dev_dbg(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%08x\n",
+	dev_info(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%08x\n",
 		(u32) base->virtbase);
 
 	return err;
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
index f5120f2..71d58c1 100644
--- a/drivers/dma/coh901318_lli.c
+++ b/drivers/dma/coh901318_lli.c
@@ -74,6 +74,8 @@
 
 	lli = head;
 	lli->phy_this = phy;
+	lli->link_addr = 0x00000000;
+	lli->virt_link_addr = 0x00000000U;
 
 	for (i = 1; i < len; i++) {
 		lli_prev = lli;
@@ -85,13 +87,13 @@
 
 		DEBUGFS_POOL_COUNTER_ADD(pool, 1);
 		lli->phy_this = phy;
+		lli->link_addr = 0x00000000;
+		lli->virt_link_addr = 0x00000000U;
 
 		lli_prev->link_addr = phy;
 		lli_prev->virt_link_addr = lli;
 	}
 
-	lli->link_addr = 0x00000000U;
-
 	spin_unlock(&pool->lock);
 
 	return head;
@@ -166,8 +168,7 @@
 	lli->src_addr = src;
 	lli->dst_addr = dst;
 
-	/* One irq per single transfer */
-	return 1;
+	return 0;
 }
 
 int
@@ -223,8 +224,7 @@
 	lli->src_addr = src;
 	lli->dst_addr = dst;
 
-	/* One irq per single transfer */
-	return 1;
+	return 0;
 }
 
 int
@@ -240,7 +240,6 @@
 	u32 ctrl_sg;
 	dma_addr_t src = 0;
 	dma_addr_t dst = 0;
-	int nbr_of_irq = 0;
 	u32 bytes_to_transfer;
 	u32 elem_size;
 
@@ -269,15 +268,12 @@
 			ctrl_sg = ctrl ? ctrl : ctrl_last;
 
 
-		if ((ctrl_sg & ctrl_irq_mask))
-			nbr_of_irq++;
-
 		if (dir == DMA_TO_DEVICE)
 			/* increment source address */
-			src = sg_dma_address(sg);
+			src = sg_phys(sg);
 		else
 			/* increment destination address */
-			dst =  sg_dma_address(sg);
+			dst =  sg_phys(sg);
 
 		bytes_to_transfer = sg_dma_len(sg);
 
@@ -310,8 +306,7 @@
 	}
 	spin_unlock(&pool->lock);
 
-	/* There can be many IRQs per sg transfer */
-	return nbr_of_irq;
+	return 0;
  err:
 	spin_unlock(&pool->lock);
 	return -EINVAL;
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 948d563..6fa55fe 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -237,7 +237,7 @@
 	dma_cookie_t		cookie;
 	enum dma_status		status;
 	enum dma_ctrl_flags 	flags;
-	u8			pq_coefs[pq_sources];
+	u8			pq_coefs[pq_sources + 1];
 	int			ret;
 	int			src_cnt;
 	int			dst_cnt;
@@ -257,7 +257,7 @@
 	} else if (thread->type == DMA_PQ) {
 		src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
 		dst_cnt = 2;
-		for (i = 0; i < pq_sources; i++)
+		for (i = 0; i < src_cnt; i++)
 			pq_coefs[i] = 1;
 	} else
 		goto err_srcs;
@@ -347,7 +347,7 @@
 		else if (thread->type == DMA_XOR)
 			tx = dev->device_prep_dma_xor(chan,
 						      dma_dsts[0] + dst_off,
-						      dma_srcs, xor_sources,
+						      dma_srcs, src_cnt,
 						      len, flags);
 		else if (thread->type == DMA_PQ) {
 			dma_addr_t dma_pq[dst_cnt];
@@ -355,7 +355,7 @@
 			for (i = 0; i < dst_cnt; i++)
 				dma_pq[i] = dma_dsts[i] + dst_off;
 			tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
-						     pq_sources, pq_coefs,
+						     src_cnt, pq_coefs,
 						     len, flags);
 		}
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 296f9e7..bbb4be5 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -37,19 +37,19 @@
 #include <asm/fsldma.h>
 #include "fsldma.h"
 
-static void dma_init(struct fsl_dma_chan *fsl_chan)
+static void dma_init(struct fsldma_chan *chan)
 {
 	/* Reset the channel */
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, 0, 32);
+	DMA_OUT(chan, &chan->regs->mr, 0, 32);
 
-	switch (fsl_chan->feature & FSL_DMA_IP_MASK) {
+	switch (chan->feature & FSL_DMA_IP_MASK) {
 	case FSL_DMA_IP_85XX:
 		/* Set the channel to below modes:
 		 * EIE - Error interrupt enable
 		 * EOSIE - End of segments interrupt enable (basic mode)
 		 * EOLNIE - End of links interrupt enable
 		 */
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EIE
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EIE
 				| FSL_DMA_MR_EOLNIE | FSL_DMA_MR_EOSIE, 32);
 		break;
 	case FSL_DMA_IP_83XX:
@@ -57,170 +57,146 @@
 		 * EOTIE - End-of-transfer interrupt enable
 		 * PRC_RM - PCI read multiple
 		 */
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
 				| FSL_DMA_MR_PRC_RM, 32);
 		break;
 	}
-
 }
 
-static void set_sr(struct fsl_dma_chan *fsl_chan, u32 val)
+static void set_sr(struct fsldma_chan *chan, u32 val)
 {
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->sr, val, 32);
+	DMA_OUT(chan, &chan->regs->sr, val, 32);
 }
 
-static u32 get_sr(struct fsl_dma_chan *fsl_chan)
+static u32 get_sr(struct fsldma_chan *chan)
 {
-	return DMA_IN(fsl_chan, &fsl_chan->reg_base->sr, 32);
+	return DMA_IN(chan, &chan->regs->sr, 32);
 }
 
-static void set_desc_cnt(struct fsl_dma_chan *fsl_chan,
+static void set_desc_cnt(struct fsldma_chan *chan,
 				struct fsl_dma_ld_hw *hw, u32 count)
 {
-	hw->count = CPU_TO_DMA(fsl_chan, count, 32);
+	hw->count = CPU_TO_DMA(chan, count, 32);
 }
 
-static void set_desc_src(struct fsl_dma_chan *fsl_chan,
+static void set_desc_src(struct fsldma_chan *chan,
 				struct fsl_dma_ld_hw *hw, dma_addr_t src)
 {
 	u64 snoop_bits;
 
-	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
 		? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
-	hw->src_addr = CPU_TO_DMA(fsl_chan, snoop_bits | src, 64);
+	hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64);
 }
 
-static void set_desc_dest(struct fsl_dma_chan *fsl_chan,
-				struct fsl_dma_ld_hw *hw, dma_addr_t dest)
+static void set_desc_dst(struct fsldma_chan *chan,
+				struct fsl_dma_ld_hw *hw, dma_addr_t dst)
 {
 	u64 snoop_bits;
 
-	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
 		? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
-	hw->dst_addr = CPU_TO_DMA(fsl_chan, snoop_bits | dest, 64);
+	hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64);
 }
 
-static void set_desc_next(struct fsl_dma_chan *fsl_chan,
+static void set_desc_next(struct fsldma_chan *chan,
 				struct fsl_dma_ld_hw *hw, dma_addr_t next)
 {
 	u64 snoop_bits;
 
-	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
 		? FSL_DMA_SNEN : 0;
-	hw->next_ln_addr = CPU_TO_DMA(fsl_chan, snoop_bits | next, 64);
+	hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64);
 }
 
-static void set_cdar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
 {
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->cdar, addr | FSL_DMA_SNEN, 64);
+	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
 }
 
-static dma_addr_t get_cdar(struct fsl_dma_chan *fsl_chan)
+static dma_addr_t get_cdar(struct fsldma_chan *chan)
 {
-	return DMA_IN(fsl_chan, &fsl_chan->reg_base->cdar, 64) & ~FSL_DMA_SNEN;
+	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
 }
 
-static void set_ndar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+static dma_addr_t get_ndar(struct fsldma_chan *chan)
 {
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->ndar, addr, 64);
+	return DMA_IN(chan, &chan->regs->ndar, 64);
 }
 
-static dma_addr_t get_ndar(struct fsl_dma_chan *fsl_chan)
+static u32 get_bcr(struct fsldma_chan *chan)
 {
-	return DMA_IN(fsl_chan, &fsl_chan->reg_base->ndar, 64);
+	return DMA_IN(chan, &chan->regs->bcr, 32);
 }
 
-static u32 get_bcr(struct fsl_dma_chan *fsl_chan)
+static int dma_is_idle(struct fsldma_chan *chan)
 {
-	return DMA_IN(fsl_chan, &fsl_chan->reg_base->bcr, 32);
-}
-
-static int dma_is_idle(struct fsl_dma_chan *fsl_chan)
-{
-	u32 sr = get_sr(fsl_chan);
+	u32 sr = get_sr(chan);
 	return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
 }
 
-static void dma_start(struct fsl_dma_chan *fsl_chan)
+static void dma_start(struct fsldma_chan *chan)
 {
-	u32 mr_set = 0;
+	u32 mode;
 
-	if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32);
-		mr_set |= FSL_DMA_MR_EMP_EN;
-	} else if ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
-				& ~FSL_DMA_MR_EMP_EN, 32);
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+			DMA_OUT(chan, &chan->regs->bcr, 0, 32);
+			mode |= FSL_DMA_MR_EMP_EN;
+		} else {
+			mode &= ~FSL_DMA_MR_EMP_EN;
+		}
 	}
 
-	if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT)
-		mr_set |= FSL_DMA_MR_EMS_EN;
+	if (chan->feature & FSL_DMA_CHAN_START_EXT)
+		mode |= FSL_DMA_MR_EMS_EN;
 	else
-		mr_set |= FSL_DMA_MR_CS;
+		mode |= FSL_DMA_MR_CS;
 
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
-			| mr_set, 32);
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 }
 
-static void dma_halt(struct fsl_dma_chan *fsl_chan)
+static void dma_halt(struct fsldma_chan *chan)
 {
+	u32 mode;
 	int i;
 
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-		DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | FSL_DMA_MR_CA,
-		32);
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-		DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & ~(FSL_DMA_MR_CS
-		| FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA), 32);
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode |= FSL_DMA_MR_CA;
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
+
+	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA);
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 
 	for (i = 0; i < 100; i++) {
-		if (dma_is_idle(fsl_chan))
-			break;
+		if (dma_is_idle(chan))
+			return;
+
 		udelay(10);
 	}
-	if (i >= 100 && !dma_is_idle(fsl_chan))
-		dev_err(fsl_chan->dev, "DMA halt timeout!\n");
+
+	if (!dma_is_idle(chan))
+		dev_err(chan->dev, "DMA halt timeout!\n");
 }
 
-static void set_ld_eol(struct fsl_dma_chan *fsl_chan,
+static void set_ld_eol(struct fsldma_chan *chan,
 			struct fsl_desc_sw *desc)
 {
 	u64 snoop_bits;
 
-	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
 		? FSL_DMA_SNEN : 0;
 
-	desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
-		DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+	desc->hw.next_ln_addr = CPU_TO_DMA(chan,
+		DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
 			| snoop_bits, 64);
 }
 
-static void append_ld_queue(struct fsl_dma_chan *fsl_chan,
-		struct fsl_desc_sw *new_desc)
-{
-	struct fsl_desc_sw *queue_tail = to_fsl_desc(fsl_chan->ld_queue.prev);
-
-	if (list_empty(&fsl_chan->ld_queue))
-		return;
-
-	/* Link to the new descriptor physical address and
-	 * Enable End-of-segment interrupt for
-	 * the last link descriptor.
-	 * (the previous node's next link descriptor)
-	 *
-	 * For FSL_DMA_IP_83xx, the snoop enable bit need be set.
-	 */
-	queue_tail->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
-			new_desc->async_tx.phys | FSL_DMA_EOSIE |
-			(((fsl_chan->feature & FSL_DMA_IP_MASK)
-				== FSL_DMA_IP_83XX) ? FSL_DMA_SNEN : 0), 64);
-}
-
 /**
  * fsl_chan_set_src_loop_size - Set source address hold transfer size
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  * @size     : Address loop size, 0 for disable loop
  *
  * The set source address hold transfer size. The source
@@ -229,29 +205,30 @@
  * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA,
  * SA + 1 ... and so on.
  */
-static void fsl_chan_set_src_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+static void fsl_chan_set_src_loop_size(struct fsldma_chan *chan, int size)
 {
+	u32 mode;
+
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+
 	switch (size) {
 	case 0:
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
-			(~FSL_DMA_MR_SAHE), 32);
+		mode &= ~FSL_DMA_MR_SAHE;
 		break;
 	case 1:
 	case 2:
 	case 4:
 	case 8:
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
-			FSL_DMA_MR_SAHE | (__ilog2(size) << 14),
-			32);
+		mode |= FSL_DMA_MR_SAHE | (__ilog2(size) << 14);
 		break;
 	}
+
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 }
 
 /**
- * fsl_chan_set_dest_loop_size - Set destination address hold transfer size
- * @fsl_chan : Freescale DMA channel
+ * fsl_chan_set_dst_loop_size - Set destination address hold transfer size
+ * @chan : Freescale DMA channel
  * @size     : Address loop size, 0 for disable loop
  *
  * The set destination address hold transfer size. The destination
@@ -260,29 +237,30 @@
  * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA,
  * TA + 1 ... and so on.
  */
-static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+static void fsl_chan_set_dst_loop_size(struct fsldma_chan *chan, int size)
 {
+	u32 mode;
+
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+
 	switch (size) {
 	case 0:
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
-			(~FSL_DMA_MR_DAHE), 32);
+		mode &= ~FSL_DMA_MR_DAHE;
 		break;
 	case 1:
 	case 2:
 	case 4:
 	case 8:
-		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
-			FSL_DMA_MR_DAHE | (__ilog2(size) << 16),
-			32);
+		mode |= FSL_DMA_MR_DAHE | (__ilog2(size) << 16);
 		break;
 	}
+
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 }
 
 /**
  * fsl_chan_set_request_count - Set DMA Request Count for external control
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  * @size     : Number of bytes to transfer in a single request
  *
  * The Freescale DMA channel can be controlled by the external signal DREQ#.
@@ -292,35 +270,38 @@
  *
  * A size of 0 disables external pause control. The maximum size is 1024.
  */
-static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
+static void fsl_chan_set_request_count(struct fsldma_chan *chan, int size)
 {
+	u32 mode;
+
 	BUG_ON(size > 1024);
-	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-		DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
-			| ((__ilog2(size) << 24) & 0x0f000000),
-		32);
+
+	mode = DMA_IN(chan, &chan->regs->mr, 32);
+	mode |= (__ilog2(size) << 24) & 0x0f000000;
+
+	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 }
 
 /**
  * fsl_chan_toggle_ext_pause - Toggle channel external pause status
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  * @enable   : 0 is disabled, 1 is enabled.
  *
  * The Freescale DMA channel can be controlled by the external signal DREQ#.
  * The DMA Request Count feature should be used in addition to this feature
  * to set the number of bytes to transfer before pausing the channel.
  */
-static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
+static void fsl_chan_toggle_ext_pause(struct fsldma_chan *chan, int enable)
 {
 	if (enable)
-		fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
+		chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
 	else
-		fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
+		chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
 }
 
 /**
  * fsl_chan_toggle_ext_start - Toggle channel external start status
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  * @enable   : 0 is disabled, 1 is enabled.
  *
  * If enable the external start, the channel can be started by an
@@ -328,141 +309,196 @@
  * transfer immediately. The DMA channel will wait for the
  * control pin asserted.
  */
-static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
+static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
 {
 	if (enable)
-		fsl_chan->feature |= FSL_DMA_CHAN_START_EXT;
+		chan->feature |= FSL_DMA_CHAN_START_EXT;
 	else
-		fsl_chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+		chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+}
+
+static void append_ld_queue(struct fsldma_chan *chan,
+			    struct fsl_desc_sw *desc)
+{
+	struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
+
+	if (list_empty(&chan->ld_pending))
+		goto out_splice;
+
+	/*
+	 * Add the hardware descriptor to the chain of hardware descriptors
+	 * that already exists in memory.
+	 *
+	 * This will un-set the EOL bit of the existing transaction, and the
+	 * last link in this transaction will become the EOL descriptor.
+	 */
+	set_desc_next(chan, &tail->hw, desc->async_tx.phys);
+
+	/*
+	 * Add the software descriptor and all children to the list
+	 * of pending transactions
+	 */
+out_splice:
+	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
 }
 
 static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
-	struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
+	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
 	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
 	struct fsl_desc_sw *child;
 	unsigned long flags;
 	dma_cookie_t cookie;
 
-	/* cookie increment and adding to ld_queue must be atomic */
-	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	cookie = fsl_chan->common.cookie;
+	/*
+	 * assign cookies to all of the software descriptors
+	 * that make up this transaction
+	 */
+	cookie = chan->common.cookie;
 	list_for_each_entry(child, &desc->tx_list, node) {
 		cookie++;
 		if (cookie < 0)
 			cookie = 1;
 
-		desc->async_tx.cookie = cookie;
+		child->async_tx.cookie = cookie;
 	}
 
-	fsl_chan->common.cookie = cookie;
-	append_ld_queue(fsl_chan, desc);
-	list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
+	chan->common.cookie = cookie;
 
-	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+	/* put this transaction onto the tail of the pending queue */
+	append_ld_queue(chan, desc);
+
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
 	return cookie;
 }
 
 /**
  * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  *
  * Return - The descriptor allocated. NULL for failed.
  */
 static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
-					struct fsl_dma_chan *fsl_chan)
+					struct fsldma_chan *chan)
 {
+	struct fsl_desc_sw *desc;
 	dma_addr_t pdesc;
-	struct fsl_desc_sw *desc_sw;
 
-	desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
-	if (desc_sw) {
-		memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
-		INIT_LIST_HEAD(&desc_sw->tx_list);
-		dma_async_tx_descriptor_init(&desc_sw->async_tx,
-						&fsl_chan->common);
-		desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
-		desc_sw->async_tx.phys = pdesc;
+	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	if (!desc) {
+		dev_dbg(chan->dev, "out of memory for link desc\n");
+		return NULL;
 	}
 
-	return desc_sw;
+	memset(desc, 0, sizeof(*desc));
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = fsl_dma_tx_submit;
+	desc->async_tx.phys = pdesc;
+
+	return desc;
 }
 
 
 /**
  * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  *
  * This function will create a dma pool for descriptor allocation.
  *
  * Return - The number of descriptors allocated.
  */
-static int fsl_dma_alloc_chan_resources(struct dma_chan *chan)
+static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan)
 {
-	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
 
 	/* Has this channel already been allocated? */
-	if (fsl_chan->desc_pool)
+	if (chan->desc_pool)
 		return 1;
 
-	/* We need the descriptor to be aligned to 32bytes
+	/*
+	 * We need the descriptor to be aligned to 32bytes
 	 * for meeting FSL DMA specification requirement.
 	 */
-	fsl_chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool",
-			fsl_chan->dev, sizeof(struct fsl_desc_sw),
-			32, 0);
-	if (!fsl_chan->desc_pool) {
-		dev_err(fsl_chan->dev, "No memory for channel %d "
-			"descriptor dma pool.\n", fsl_chan->id);
-		return 0;
+	chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool",
+					  chan->dev,
+					  sizeof(struct fsl_desc_sw),
+					  __alignof__(struct fsl_desc_sw), 0);
+	if (!chan->desc_pool) {
+		dev_err(chan->dev, "unable to allocate channel %d "
+				   "descriptor pool\n", chan->id);
+		return -ENOMEM;
 	}
 
+	/* there is at least one descriptor free to be allocated */
 	return 1;
 }
 
 /**
- * fsl_dma_free_chan_resources - Free all resources of the channel.
- * @fsl_chan : Freescale DMA channel
+ * fsldma_free_desc_list - Free all descriptors in a queue
+ * @chan: Freescae DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->desc_lock
  */
-static void fsl_dma_free_chan_resources(struct dma_chan *chan)
+static void fsldma_free_desc_list(struct fsldma_chan *chan,
+				  struct list_head *list)
 {
-	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
 	struct fsl_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, list, node) {
+		list_del(&desc->node);
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	}
+}
+
+static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
+					  struct list_head *list)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe_reverse(desc, _desc, list, node) {
+		list_del(&desc->node);
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	}
+}
+
+/**
+ * fsl_dma_free_chan_resources - Free all resources of the channel.
+ * @chan : Freescale DMA channel
+ */
+static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
 	unsigned long flags;
 
-	dev_dbg(fsl_chan->dev, "Free all channel resources.\n");
-	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
-	list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
-#ifdef FSL_DMA_LD_DEBUG
-		dev_dbg(fsl_chan->dev,
-				"LD %p will be released.\n", desc);
-#endif
-		list_del(&desc->node);
-		/* free link descriptor */
-		dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
-	}
-	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
-	dma_pool_destroy(fsl_chan->desc_pool);
+	dev_dbg(chan->dev, "Free all channel resources.\n");
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	fsldma_free_desc_list(chan, &chan->ld_pending);
+	fsldma_free_desc_list(chan, &chan->ld_running);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
-	fsl_chan->desc_pool = NULL;
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
 }
 
 static struct dma_async_tx_descriptor *
-fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
+fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
 {
-	struct fsl_dma_chan *fsl_chan;
+	struct fsldma_chan *chan;
 	struct fsl_desc_sw *new;
 
-	if (!chan)
+	if (!dchan)
 		return NULL;
 
-	fsl_chan = to_fsl_chan(chan);
+	chan = to_fsl_chan(dchan);
 
-	new = fsl_dma_alloc_descriptor(fsl_chan);
+	new = fsl_dma_alloc_descriptor(chan);
 	if (!new) {
-		dev_err(fsl_chan->dev, "No free memory for link descriptor\n");
+		dev_err(chan->dev, "No free memory for link descriptor\n");
 		return NULL;
 	}
 
@@ -473,51 +509,50 @@
 	list_add_tail(&new->node, &new->tx_list);
 
 	/* Set End-of-link to the last link descriptor of new list*/
-	set_ld_eol(fsl_chan, new);
+	set_ld_eol(chan, new);
 
 	return &new->async_tx;
 }
 
 static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
-	struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+	struct dma_chan *dchan, dma_addr_t dma_dst, dma_addr_t dma_src,
 	size_t len, unsigned long flags)
 {
-	struct fsl_dma_chan *fsl_chan;
+	struct fsldma_chan *chan;
 	struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
-	struct list_head *list;
 	size_t copy;
 
-	if (!chan)
+	if (!dchan)
 		return NULL;
 
 	if (!len)
 		return NULL;
 
-	fsl_chan = to_fsl_chan(chan);
+	chan = to_fsl_chan(dchan);
 
 	do {
 
 		/* Allocate the link descriptor from DMA pool */
-		new = fsl_dma_alloc_descriptor(fsl_chan);
+		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(fsl_chan->dev,
+			dev_err(chan->dev,
 					"No free memory for link descriptor\n");
 			goto fail;
 		}
 #ifdef FSL_DMA_LD_DEBUG
-		dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
 #endif
 
 		copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
 
-		set_desc_cnt(fsl_chan, &new->hw, copy);
-		set_desc_src(fsl_chan, &new->hw, dma_src);
-		set_desc_dest(fsl_chan, &new->hw, dma_dest);
+		set_desc_cnt(chan, &new->hw, copy);
+		set_desc_src(chan, &new->hw, dma_src);
+		set_desc_dst(chan, &new->hw, dma_dst);
 
 		if (!first)
 			first = new;
 		else
-			set_desc_next(fsl_chan, &prev->hw, new->async_tx.phys);
+			set_desc_next(chan, &prev->hw, new->async_tx.phys);
 
 		new->async_tx.cookie = 0;
 		async_tx_ack(&new->async_tx);
@@ -525,7 +560,7 @@
 		prev = new;
 		len -= copy;
 		dma_src += copy;
-		dma_dest += copy;
+		dma_dst += copy;
 
 		/* Insert the link descriptor to the LD ring */
 		list_add_tail(&new->node, &first->tx_list);
@@ -535,7 +570,7 @@
 	new->async_tx.cookie = -EBUSY;
 
 	/* Set End-of-link to the last link descriptor of new list*/
-	set_ld_eol(fsl_chan, new);
+	set_ld_eol(chan, new);
 
 	return &first->async_tx;
 
@@ -543,12 +578,7 @@
 	if (!first)
 		return NULL;
 
-	list = &first->tx_list;
-	list_for_each_entry_safe_reverse(new, prev, list, node) {
-		list_del(&new->node);
-		dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
-	}
-
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -565,13 +595,12 @@
  * chan->private variable.
  */
 static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
-	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
 	enum dma_data_direction direction, unsigned long flags)
 {
-	struct fsl_dma_chan *fsl_chan;
+	struct fsldma_chan *chan;
 	struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
 	struct fsl_dma_slave *slave;
-	struct list_head *tx_list;
 	size_t copy;
 
 	int i;
@@ -581,14 +610,14 @@
 	struct fsl_dma_hw_addr *hw;
 	dma_addr_t dma_dst, dma_src;
 
-	if (!chan)
+	if (!dchan)
 		return NULL;
 
-	if (!chan->private)
+	if (!dchan->private)
 		return NULL;
 
-	fsl_chan = to_fsl_chan(chan);
-	slave = chan->private;
+	chan = to_fsl_chan(dchan);
+	slave = dchan->private;
 
 	if (list_empty(&slave->addresses))
 		return NULL;
@@ -637,14 +666,14 @@
 			}
 
 			/* Allocate the link descriptor from DMA pool */
-			new = fsl_dma_alloc_descriptor(fsl_chan);
+			new = fsl_dma_alloc_descriptor(chan);
 			if (!new) {
-				dev_err(fsl_chan->dev, "No free memory for "
+				dev_err(chan->dev, "No free memory for "
 						       "link descriptor\n");
 				goto fail;
 			}
 #ifdef FSL_DMA_LD_DEBUG
-			dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+			dev_dbg(chan->dev, "new link desc alloc %p\n", new);
 #endif
 
 			/*
@@ -671,9 +700,9 @@
 			}
 
 			/* Fill in the descriptor */
-			set_desc_cnt(fsl_chan, &new->hw, copy);
-			set_desc_src(fsl_chan, &new->hw, dma_src);
-			set_desc_dest(fsl_chan, &new->hw, dma_dst);
+			set_desc_cnt(chan, &new->hw, copy);
+			set_desc_src(chan, &new->hw, dma_src);
+			set_desc_dst(chan, &new->hw, dma_dst);
 
 			/*
 			 * If this is not the first descriptor, chain the
@@ -682,7 +711,7 @@
 			if (!first) {
 				first = new;
 			} else {
-				set_desc_next(fsl_chan, &prev->hw,
+				set_desc_next(chan, &prev->hw,
 					      new->async_tx.phys);
 			}
 
@@ -708,23 +737,23 @@
 	new->async_tx.cookie = -EBUSY;
 
 	/* Set End-of-link to the last link descriptor of new list */
-	set_ld_eol(fsl_chan, new);
+	set_ld_eol(chan, new);
 
 	/* Enable extra controller features */
-	if (fsl_chan->set_src_loop_size)
-		fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
+	if (chan->set_src_loop_size)
+		chan->set_src_loop_size(chan, slave->src_loop_size);
 
-	if (fsl_chan->set_dest_loop_size)
-		fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
+	if (chan->set_dst_loop_size)
+		chan->set_dst_loop_size(chan, slave->dst_loop_size);
 
-	if (fsl_chan->toggle_ext_start)
-		fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
+	if (chan->toggle_ext_start)
+		chan->toggle_ext_start(chan, slave->external_start);
 
-	if (fsl_chan->toggle_ext_pause)
-		fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
+	if (chan->toggle_ext_pause)
+		chan->toggle_ext_pause(chan, slave->external_pause);
 
-	if (fsl_chan->set_request_count)
-		fsl_chan->set_request_count(fsl_chan, slave->request_count);
+	if (chan->set_request_count)
+		chan->set_request_count(chan, slave->request_count);
 
 	return &first->async_tx;
 
@@ -741,215 +770,216 @@
 	 *
 	 * We're re-using variables for the loop, oh well
 	 */
-	tx_list = &first->tx_list;
-	list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
-		list_del_init(&new->node);
-		dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
-	}
-
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
 	return NULL;
 }
 
-static void fsl_dma_device_terminate_all(struct dma_chan *chan)
+static void fsl_dma_device_terminate_all(struct dma_chan *dchan)
 {
-	struct fsl_dma_chan *fsl_chan;
-	struct fsl_desc_sw *desc, *tmp;
+	struct fsldma_chan *chan;
 	unsigned long flags;
 
-	if (!chan)
+	if (!dchan)
 		return;
 
-	fsl_chan = to_fsl_chan(chan);
+	chan = to_fsl_chan(dchan);
 
 	/* Halt the DMA engine */
-	dma_halt(fsl_chan);
+	dma_halt(chan);
 
-	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+	spin_lock_irqsave(&chan->desc_lock, flags);
 
 	/* Remove and free all of the descriptors in the LD queue */
-	list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
-		list_del(&desc->node);
-		dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
-	}
+	fsldma_free_desc_list(chan, &chan->ld_pending);
+	fsldma_free_desc_list(chan, &chan->ld_running);
 
-	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
 /**
  * fsl_dma_update_completed_cookie - Update the completed cookie.
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
+ *
+ * CONTEXT: hardirq
  */
-static void fsl_dma_update_completed_cookie(struct fsl_dma_chan *fsl_chan)
+static void fsl_dma_update_completed_cookie(struct fsldma_chan *chan)
 {
-	struct fsl_desc_sw *cur_desc, *desc;
-	dma_addr_t ld_phy;
+	struct fsl_desc_sw *desc;
+	unsigned long flags;
+	dma_cookie_t cookie;
 
-	ld_phy = get_cdar(fsl_chan) & FSL_DMA_NLDA_MASK;
+	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	if (ld_phy) {
-		cur_desc = NULL;
-		list_for_each_entry(desc, &fsl_chan->ld_queue, node)
-			if (desc->async_tx.phys == ld_phy) {
-				cur_desc = desc;
-				break;
-			}
-
-		if (cur_desc && cur_desc->async_tx.cookie) {
-			if (dma_is_idle(fsl_chan))
-				fsl_chan->completed_cookie =
-					cur_desc->async_tx.cookie;
-			else
-				fsl_chan->completed_cookie =
-					cur_desc->async_tx.cookie - 1;
-		}
+	if (list_empty(&chan->ld_running)) {
+		dev_dbg(chan->dev, "no running descriptors\n");
+		goto out_unlock;
 	}
+
+	/* Get the last descriptor, update the cookie to that */
+	desc = to_fsl_desc(chan->ld_running.prev);
+	if (dma_is_idle(chan))
+		cookie = desc->async_tx.cookie;
+	else {
+		cookie = desc->async_tx.cookie - 1;
+		if (unlikely(cookie < DMA_MIN_COOKIE))
+			cookie = DMA_MAX_COOKIE;
+	}
+
+	chan->completed_cookie = cookie;
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+}
+
+/**
+ * fsldma_desc_status - Check the status of a descriptor
+ * @chan: Freescale DMA channel
+ * @desc: DMA SW descriptor
+ *
+ * This function will return the status of the given descriptor
+ */
+static enum dma_status fsldma_desc_status(struct fsldma_chan *chan,
+					  struct fsl_desc_sw *desc)
+{
+	return dma_async_is_complete(desc->async_tx.cookie,
+				     chan->completed_cookie,
+				     chan->common.cookie);
 }
 
 /**
  * fsl_chan_ld_cleanup - Clean up link descriptors
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  *
  * This function clean up the ld_queue of DMA channel.
- * If 'in_intr' is set, the function will move the link descriptor to
- * the recycle list. Otherwise, free it directly.
  */
-static void fsl_chan_ld_cleanup(struct fsl_dma_chan *fsl_chan)
+static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
 {
 	struct fsl_desc_sw *desc, *_desc;
 	unsigned long flags;
 
-	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	dev_dbg(fsl_chan->dev, "chan completed_cookie = %d\n",
-			fsl_chan->completed_cookie);
-	list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
+	dev_dbg(chan->dev, "chan completed_cookie = %d\n", chan->completed_cookie);
+	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
 		dma_async_tx_callback callback;
 		void *callback_param;
 
-		if (dma_async_is_complete(desc->async_tx.cookie,
-			    fsl_chan->completed_cookie, fsl_chan->common.cookie)
-				== DMA_IN_PROGRESS)
+		if (fsldma_desc_status(chan, desc) == DMA_IN_PROGRESS)
 			break;
 
-		callback = desc->async_tx.callback;
-		callback_param = desc->async_tx.callback_param;
-
-		/* Remove from ld_queue list */
+		/* Remove from the list of running transactions */
 		list_del(&desc->node);
 
-		dev_dbg(fsl_chan->dev, "link descriptor %p will be recycle.\n",
-				desc);
-		dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
-
 		/* Run the link descriptor callback function */
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
 		if (callback) {
-			spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
-			dev_dbg(fsl_chan->dev, "link descriptor %p callback\n",
-					desc);
+			spin_unlock_irqrestore(&chan->desc_lock, flags);
+			dev_dbg(chan->dev, "LD %p callback\n", desc);
 			callback(callback_param);
-			spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+			spin_lock_irqsave(&chan->desc_lock, flags);
 		}
+
+		/* Run any dependencies, then free the descriptor */
+		dma_run_dependencies(&desc->async_tx);
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
 	}
-	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
 /**
- * fsl_chan_xfer_ld_queue - Transfer link descriptors in channel ld_queue.
- * @fsl_chan : Freescale DMA channel
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * This will make sure that any pending transactions will be run.
+ * If the DMA controller is idle, it will be started. Otherwise,
+ * the DMA controller's interrupt handler will start any pending
+ * transactions when it becomes idle.
  */
-static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 {
-	struct list_head *ld_node;
-	dma_addr_t next_dest_addr;
+	struct fsl_desc_sw *desc;
 	unsigned long flags;
 
-	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	if (!dma_is_idle(fsl_chan))
-		goto out_unlock;
-
-	dma_halt(fsl_chan);
-
-	/* If there are some link descriptors
-	 * not transfered in queue. We need to start it.
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
 	 */
-
-	/* Find the first un-transfer desciptor */
-	for (ld_node = fsl_chan->ld_queue.next;
-		(ld_node != &fsl_chan->ld_queue)
-			&& (dma_async_is_complete(
-				to_fsl_desc(ld_node)->async_tx.cookie,
-				fsl_chan->completed_cookie,
-				fsl_chan->common.cookie) == DMA_SUCCESS);
-		ld_node = ld_node->next);
-
-	if (ld_node != &fsl_chan->ld_queue) {
-		/* Get the ld start address from ld_queue */
-		next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys;
-		dev_dbg(fsl_chan->dev, "xfer LDs staring from 0x%llx\n",
-				(unsigned long long)next_dest_addr);
-		set_cdar(fsl_chan, next_dest_addr);
-		dma_start(fsl_chan);
-	} else {
-		set_cdar(fsl_chan, 0);
-		set_ndar(fsl_chan, 0);
+	if (list_empty(&chan->ld_pending)) {
+		dev_dbg(chan->dev, "no pending LDs\n");
+		goto out_unlock;
 	}
 
+	/*
+	 * The DMA controller is not idle, which means the interrupt
+	 * handler will start any queued transactions when it runs
+	 * at the end of the current transaction
+	 */
+	if (!dma_is_idle(chan)) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		goto out_unlock;
+	}
+
+	/*
+	 * TODO:
+	 * make sure the dma_halt() function really un-wedges the
+	 * controller as much as possible
+	 */
+	dma_halt(chan);
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	dma_start(chan);
+
 out_unlock:
-	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
 /**
  * fsl_dma_memcpy_issue_pending - Issue the DMA start command
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  */
-static void fsl_dma_memcpy_issue_pending(struct dma_chan *chan)
+static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
 {
-	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
-
-#ifdef FSL_DMA_LD_DEBUG
-	struct fsl_desc_sw *ld;
-	unsigned long flags;
-
-	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
-	if (list_empty(&fsl_chan->ld_queue)) {
-		spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
-		return;
-	}
-
-	dev_dbg(fsl_chan->dev, "--memcpy issue--\n");
-	list_for_each_entry(ld, &fsl_chan->ld_queue, node) {
-		int i;
-		dev_dbg(fsl_chan->dev, "Ch %d, LD %08x\n",
-				fsl_chan->id, ld->async_tx.phys);
-		for (i = 0; i < 8; i++)
-			dev_dbg(fsl_chan->dev, "LD offset %d: %08x\n",
-					i, *(((u32 *)&ld->hw) + i));
-	}
-	dev_dbg(fsl_chan->dev, "----------------\n");
-	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
-#endif
-
-	fsl_chan_xfer_ld_queue(fsl_chan);
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	fsl_chan_xfer_ld_queue(chan);
 }
 
 /**
  * fsl_dma_is_complete - Determine the DMA status
- * @fsl_chan : Freescale DMA channel
+ * @chan : Freescale DMA channel
  */
-static enum dma_status fsl_dma_is_complete(struct dma_chan *chan,
+static enum dma_status fsl_dma_is_complete(struct dma_chan *dchan,
 					dma_cookie_t cookie,
 					dma_cookie_t *done,
 					dma_cookie_t *used)
 {
-	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
 	dma_cookie_t last_used;
 	dma_cookie_t last_complete;
 
-	fsl_chan_ld_cleanup(fsl_chan);
+	fsl_chan_ld_cleanup(chan);
 
-	last_used = chan->cookie;
-	last_complete = fsl_chan->completed_cookie;
+	last_used = dchan->cookie;
+	last_complete = chan->completed_cookie;
 
 	if (done)
 		*done = last_complete;
@@ -960,32 +990,37 @@
 	return dma_async_is_complete(cookie, last_complete, last_used);
 }
 
-static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data)
+/*----------------------------------------------------------------------------*/
+/* Interrupt Handling                                                         */
+/*----------------------------------------------------------------------------*/
+
+static irqreturn_t fsldma_chan_irq(int irq, void *data)
 {
-	struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
-	u32 stat;
+	struct fsldma_chan *chan = data;
 	int update_cookie = 0;
 	int xfer_ld_q = 0;
+	u32 stat;
 
-	stat = get_sr(fsl_chan);
-	dev_dbg(fsl_chan->dev, "event: channel %d, stat = 0x%x\n",
-						fsl_chan->id, stat);
-	set_sr(fsl_chan, stat);		/* Clear the event register */
+	/* save and clear the status register */
+	stat = get_sr(chan);
+	set_sr(chan, stat);
+	dev_dbg(chan->dev, "irq: channel %d, stat = 0x%x\n", chan->id, stat);
 
 	stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
 	if (!stat)
 		return IRQ_NONE;
 
 	if (stat & FSL_DMA_SR_TE)
-		dev_err(fsl_chan->dev, "Transfer Error!\n");
+		dev_err(chan->dev, "Transfer Error!\n");
 
-	/* Programming Error
+	/*
+	 * Programming Error
 	 * The DMA_INTERRUPT async_tx is a NULL transfer, which will
 	 * triger a PE interrupt.
 	 */
 	if (stat & FSL_DMA_SR_PE) {
-		dev_dbg(fsl_chan->dev, "event: Programming Error INT\n");
-		if (get_bcr(fsl_chan) == 0) {
+		dev_dbg(chan->dev, "irq: Programming Error INT\n");
+		if (get_bcr(chan) == 0) {
 			/* BCR register is 0, this is a DMA_INTERRUPT async_tx.
 			 * Now, update the completed cookie, and continue the
 			 * next uncompleted transfer.
@@ -996,208 +1031,296 @@
 		stat &= ~FSL_DMA_SR_PE;
 	}
 
-	/* If the link descriptor segment transfer finishes,
+	/*
+	 * If the link descriptor segment transfer finishes,
 	 * we will recycle the used descriptor.
 	 */
 	if (stat & FSL_DMA_SR_EOSI) {
-		dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n");
-		dev_dbg(fsl_chan->dev, "event: clndar 0x%llx, nlndar 0x%llx\n",
-			(unsigned long long)get_cdar(fsl_chan),
-			(unsigned long long)get_ndar(fsl_chan));
+		dev_dbg(chan->dev, "irq: End-of-segments INT\n");
+		dev_dbg(chan->dev, "irq: clndar 0x%llx, nlndar 0x%llx\n",
+			(unsigned long long)get_cdar(chan),
+			(unsigned long long)get_ndar(chan));
 		stat &= ~FSL_DMA_SR_EOSI;
 		update_cookie = 1;
 	}
 
-	/* For MPC8349, EOCDI event need to update cookie
+	/*
+	 * For MPC8349, EOCDI event need to update cookie
 	 * and start the next transfer if it exist.
 	 */
 	if (stat & FSL_DMA_SR_EOCDI) {
-		dev_dbg(fsl_chan->dev, "event: End-of-Chain link INT\n");
+		dev_dbg(chan->dev, "irq: End-of-Chain link INT\n");
 		stat &= ~FSL_DMA_SR_EOCDI;
 		update_cookie = 1;
 		xfer_ld_q = 1;
 	}
 
-	/* If it current transfer is the end-of-transfer,
+	/*
+	 * If it current transfer is the end-of-transfer,
 	 * we should clear the Channel Start bit for
 	 * prepare next transfer.
 	 */
 	if (stat & FSL_DMA_SR_EOLNI) {
-		dev_dbg(fsl_chan->dev, "event: End-of-link INT\n");
+		dev_dbg(chan->dev, "irq: End-of-link INT\n");
 		stat &= ~FSL_DMA_SR_EOLNI;
 		xfer_ld_q = 1;
 	}
 
 	if (update_cookie)
-		fsl_dma_update_completed_cookie(fsl_chan);
+		fsl_dma_update_completed_cookie(chan);
 	if (xfer_ld_q)
-		fsl_chan_xfer_ld_queue(fsl_chan);
+		fsl_chan_xfer_ld_queue(chan);
 	if (stat)
-		dev_dbg(fsl_chan->dev, "event: unhandled sr 0x%02x\n",
-					stat);
+		dev_dbg(chan->dev, "irq: unhandled sr 0x%02x\n", stat);
 
-	dev_dbg(fsl_chan->dev, "event: Exit\n");
-	tasklet_schedule(&fsl_chan->tasklet);
+	dev_dbg(chan->dev, "irq: Exit\n");
+	tasklet_schedule(&chan->tasklet);
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t fsl_dma_do_interrupt(int irq, void *data)
-{
-	struct fsl_dma_device *fdev = (struct fsl_dma_device *)data;
-	u32 gsr;
-	int ch_nr;
-
-	gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->reg_base)
-			: in_le32(fdev->reg_base);
-	ch_nr = (32 - ffs(gsr)) / 8;
-
-	return fdev->chan[ch_nr] ? fsl_dma_chan_do_interrupt(irq,
-			fdev->chan[ch_nr]) : IRQ_NONE;
-}
-
 static void dma_do_tasklet(unsigned long data)
 {
-	struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
-	fsl_chan_ld_cleanup(fsl_chan);
+	struct fsldma_chan *chan = (struct fsldma_chan *)data;
+	fsl_chan_ld_cleanup(chan);
 }
 
-static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
-	struct device_node *node, u32 feature, const char *compatible)
+static irqreturn_t fsldma_ctrl_irq(int irq, void *data)
 {
-	struct fsl_dma_chan *new_fsl_chan;
-	int err;
+	struct fsldma_device *fdev = data;
+	struct fsldma_chan *chan;
+	unsigned int handled = 0;
+	u32 gsr, mask;
+	int i;
 
-	/* alloc channel */
-	new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
-	if (!new_fsl_chan) {
-		dev_err(fdev->dev, "No free memory for allocating "
-				"dma channels!\n");
-		return -ENOMEM;
+	gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->regs)
+						   : in_le32(fdev->regs);
+	mask = 0xff000000;
+	dev_dbg(fdev->dev, "IRQ: gsr 0x%.8x\n", gsr);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (gsr & mask) {
+			dev_dbg(fdev->dev, "IRQ: chan %d\n", chan->id);
+			fsldma_chan_irq(irq, chan);
+			handled++;
+		}
+
+		gsr &= ~mask;
+		mask >>= 8;
 	}
 
-	/* get dma channel register base */
-	err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
-	if (err) {
-		dev_err(fdev->dev, "Can't get %s property 'reg'\n",
-				node->full_name);
-		goto err_no_reg;
+	return IRQ_RETVAL(handled);
+}
+
+static void fsldma_free_irqs(struct fsldma_device *fdev)
+{
+	struct fsldma_chan *chan;
+	int i;
+
+	if (fdev->irq != NO_IRQ) {
+		dev_dbg(fdev->dev, "free per-controller IRQ\n");
+		free_irq(fdev->irq, fdev);
+		return;
 	}
 
-	new_fsl_chan->feature = feature;
-
-	if (!fdev->feature)
-		fdev->feature = new_fsl_chan->feature;
-
-	/* If the DMA device's feature is different than its channels',
-	 * report the bug.
-	 */
-	WARN_ON(fdev->feature != new_fsl_chan->feature);
-
-	new_fsl_chan->dev = fdev->dev;
-	new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
-			new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
-
-	new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
-	if (new_fsl_chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
-		dev_err(fdev->dev, "There is no %d channel!\n",
-				new_fsl_chan->id);
-		err = -EINVAL;
-		goto err_no_chan;
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (chan && chan->irq != NO_IRQ) {
+			dev_dbg(fdev->dev, "free channel %d IRQ\n", chan->id);
+			free_irq(chan->irq, chan);
+		}
 	}
-	fdev->chan[new_fsl_chan->id] = new_fsl_chan;
-	tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet,
-			(unsigned long)new_fsl_chan);
+}
 
-	/* Init the channel */
-	dma_init(new_fsl_chan);
+static int fsldma_request_irqs(struct fsldma_device *fdev)
+{
+	struct fsldma_chan *chan;
+	int ret;
+	int i;
 
-	/* Clear cdar registers */
-	set_cdar(new_fsl_chan, 0);
-
-	switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) {
-	case FSL_DMA_IP_85XX:
-		new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
-	case FSL_DMA_IP_83XX:
-		new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
-		new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
-		new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
-		new_fsl_chan->set_request_count = fsl_chan_set_request_count;
+	/* if we have a per-controller IRQ, use that */
+	if (fdev->irq != NO_IRQ) {
+		dev_dbg(fdev->dev, "request per-controller IRQ\n");
+		ret = request_irq(fdev->irq, fsldma_ctrl_irq, IRQF_SHARED,
+				  "fsldma-controller", fdev);
+		return ret;
 	}
 
-	spin_lock_init(&new_fsl_chan->desc_lock);
-	INIT_LIST_HEAD(&new_fsl_chan->ld_queue);
+	/* no per-controller IRQ, use the per-channel IRQs */
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
 
-	new_fsl_chan->common.device = &fdev->common;
+		if (chan->irq == NO_IRQ) {
+			dev_err(fdev->dev, "no interrupts property defined for "
+					   "DMA channel %d. Please fix your "
+					   "device tree\n", chan->id);
+			ret = -ENODEV;
+			goto out_unwind;
+		}
 
-	/* Add the channel to DMA device channel list */
-	list_add_tail(&new_fsl_chan->common.device_node,
-			&fdev->common.channels);
-	fdev->common.chancnt++;
-
-	new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
-	if (new_fsl_chan->irq != NO_IRQ) {
-		err = request_irq(new_fsl_chan->irq,
-					&fsl_dma_chan_do_interrupt, IRQF_SHARED,
-					"fsldma-channel", new_fsl_chan);
-		if (err) {
-			dev_err(fdev->dev, "DMA channel %s request_irq error "
-				"with return %d\n", node->full_name, err);
-			goto err_no_irq;
+		dev_dbg(fdev->dev, "request channel %d IRQ\n", chan->id);
+		ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED,
+				  "fsldma-chan", chan);
+		if (ret) {
+			dev_err(fdev->dev, "unable to request IRQ for DMA "
+					   "channel %d\n", chan->id);
+			goto out_unwind;
 		}
 	}
 
-	dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
-		 compatible,
-		 new_fsl_chan->irq != NO_IRQ ? new_fsl_chan->irq : fdev->irq);
+	return 0;
+
+out_unwind:
+	for (/* none */; i >= 0; i--) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (chan->irq == NO_IRQ)
+			continue;
+
+		free_irq(chan->irq, chan);
+	}
+
+	return ret;
+}
+
+/*----------------------------------------------------------------------------*/
+/* OpenFirmware Subsystem                                                     */
+/*----------------------------------------------------------------------------*/
+
+static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
+	struct device_node *node, u32 feature, const char *compatible)
+{
+	struct fsldma_chan *chan;
+	struct resource res;
+	int err;
+
+	/* alloc channel */
+	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+	if (!chan) {
+		dev_err(fdev->dev, "no free memory for DMA channels!\n");
+		err = -ENOMEM;
+		goto out_return;
+	}
+
+	/* ioremap registers for use */
+	chan->regs = of_iomap(node, 0);
+	if (!chan->regs) {
+		dev_err(fdev->dev, "unable to ioremap registers\n");
+		err = -ENOMEM;
+		goto out_free_chan;
+	}
+
+	err = of_address_to_resource(node, 0, &res);
+	if (err) {
+		dev_err(fdev->dev, "unable to find 'reg' property\n");
+		goto out_iounmap_regs;
+	}
+
+	chan->feature = feature;
+	if (!fdev->feature)
+		fdev->feature = chan->feature;
+
+	/*
+	 * If the DMA device's feature is different than the feature
+	 * of its channels, report the bug
+	 */
+	WARN_ON(fdev->feature != chan->feature);
+
+	chan->dev = fdev->dev;
+	chan->id = ((res.start - 0x100) & 0xfff) >> 7;
+	if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
+		dev_err(fdev->dev, "too many channels for device\n");
+		err = -EINVAL;
+		goto out_iounmap_regs;
+	}
+
+	fdev->chan[chan->id] = chan;
+	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+
+	/* Initialize the channel */
+	dma_init(chan);
+
+	/* Clear cdar registers */
+	set_cdar(chan, 0);
+
+	switch (chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
+	case FSL_DMA_IP_83XX:
+		chan->toggle_ext_start = fsl_chan_toggle_ext_start;
+		chan->set_src_loop_size = fsl_chan_set_src_loop_size;
+		chan->set_dst_loop_size = fsl_chan_set_dst_loop_size;
+		chan->set_request_count = fsl_chan_set_request_count;
+	}
+
+	spin_lock_init(&chan->desc_lock);
+	INIT_LIST_HEAD(&chan->ld_pending);
+	INIT_LIST_HEAD(&chan->ld_running);
+
+	chan->common.device = &fdev->common;
+
+	/* find the IRQ line, if it exists in the device tree */
+	chan->irq = irq_of_parse_and_map(node, 0);
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&chan->common.device_node, &fdev->common.channels);
+	fdev->common.chancnt++;
+
+	dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible,
+		 chan->irq != NO_IRQ ? chan->irq : fdev->irq);
 
 	return 0;
 
-err_no_irq:
-	list_del(&new_fsl_chan->common.device_node);
-err_no_chan:
-	iounmap(new_fsl_chan->reg_base);
-err_no_reg:
-	kfree(new_fsl_chan);
+out_iounmap_regs:
+	iounmap(chan->regs);
+out_free_chan:
+	kfree(chan);
+out_return:
 	return err;
 }
 
-static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
+static void fsl_dma_chan_remove(struct fsldma_chan *chan)
 {
-	if (fchan->irq != NO_IRQ)
-		free_irq(fchan->irq, fchan);
-	list_del(&fchan->common.device_node);
-	iounmap(fchan->reg_base);
-	kfree(fchan);
+	irq_dispose_mapping(chan->irq);
+	list_del(&chan->common.device_node);
+	iounmap(chan->regs);
+	kfree(chan);
 }
 
-static int __devinit of_fsl_dma_probe(struct of_device *dev,
+static int __devinit fsldma_of_probe(struct of_device *op,
 			const struct of_device_id *match)
 {
-	int err;
-	struct fsl_dma_device *fdev;
+	struct fsldma_device *fdev;
 	struct device_node *child;
+	int err;
 
-	fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
+	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
 	if (!fdev) {
-		dev_err(&dev->dev, "No enough memory for 'priv'\n");
-		return -ENOMEM;
+		dev_err(&op->dev, "No enough memory for 'priv'\n");
+		err = -ENOMEM;
+		goto out_return;
 	}
-	fdev->dev = &dev->dev;
+
+	fdev->dev = &op->dev;
 	INIT_LIST_HEAD(&fdev->common.channels);
 
-	/* get DMA controller register base */
-	err = of_address_to_resource(dev->node, 0, &fdev->reg);
-	if (err) {
-		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
-				dev->node->full_name);
-		goto err_no_reg;
+	/* ioremap the registers for use */
+	fdev->regs = of_iomap(op->node, 0);
+	if (!fdev->regs) {
+		dev_err(&op->dev, "unable to ioremap registers\n");
+		err = -ENOMEM;
+		goto out_free_fdev;
 	}
 
-	dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
-			"controller at 0x%llx...\n",
-			match->compatible, (unsigned long long)fdev->reg.start);
-	fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end
-						- fdev->reg.start + 1);
+	/* map the channel IRQ if it exists, but don't hookup the handler yet */
+	fdev->irq = irq_of_parse_and_map(op->node, 0);
 
 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
@@ -1210,103 +1333,111 @@
 	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
 	fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
 	fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
-	fdev->common.dev = &dev->dev;
+	fdev->common.dev = &op->dev;
 
-	fdev->irq = irq_of_parse_and_map(dev->node, 0);
-	if (fdev->irq != NO_IRQ) {
-		err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
-					"fsldma-device", fdev);
-		if (err) {
-			dev_err(&dev->dev, "DMA device request_irq error "
-				"with return %d\n", err);
-			goto err;
-		}
-	}
+	dev_set_drvdata(&op->dev, fdev);
 
-	dev_set_drvdata(&(dev->dev), fdev);
-
-	/* We cannot use of_platform_bus_probe() because there is no
-	 * of_platform_bus_remove.  Instead, we manually instantiate every DMA
+	/*
+	 * We cannot use of_platform_bus_probe() because there is no
+	 * of_platform_bus_remove(). Instead, we manually instantiate every DMA
 	 * channel object.
 	 */
-	for_each_child_of_node(dev->node, child) {
-		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+	for_each_child_of_node(op->node, child) {
+		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel")) {
 			fsl_dma_chan_probe(fdev, child,
 				FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
 				"fsl,eloplus-dma-channel");
-		if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+		}
+
+		if (of_device_is_compatible(child, "fsl,elo-dma-channel")) {
 			fsl_dma_chan_probe(fdev, child,
 				FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
 				"fsl,elo-dma-channel");
+		}
+	}
+
+	/*
+	 * Hookup the IRQ handler(s)
+	 *
+	 * If we have a per-controller interrupt, we prefer that to the
+	 * per-channel interrupts to reduce the number of shared interrupt
+	 * handlers on the same IRQ line
+	 */
+	err = fsldma_request_irqs(fdev);
+	if (err) {
+		dev_err(fdev->dev, "unable to request IRQs\n");
+		goto out_free_fdev;
 	}
 
 	dma_async_device_register(&fdev->common);
 	return 0;
 
-err:
-	iounmap(fdev->reg_base);
-err_no_reg:
+out_free_fdev:
+	irq_dispose_mapping(fdev->irq);
 	kfree(fdev);
+out_return:
 	return err;
 }
 
-static int of_fsl_dma_remove(struct of_device *of_dev)
+static int fsldma_of_remove(struct of_device *op)
 {
-	struct fsl_dma_device *fdev;
+	struct fsldma_device *fdev;
 	unsigned int i;
 
-	fdev = dev_get_drvdata(&of_dev->dev);
-
+	fdev = dev_get_drvdata(&op->dev);
 	dma_async_device_unregister(&fdev->common);
 
-	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+	fsldma_free_irqs(fdev);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
 		if (fdev->chan[i])
 			fsl_dma_chan_remove(fdev->chan[i]);
+	}
 
-	if (fdev->irq != NO_IRQ)
-		free_irq(fdev->irq, fdev);
-
-	iounmap(fdev->reg_base);
-
+	iounmap(fdev->regs);
+	dev_set_drvdata(&op->dev, NULL);
 	kfree(fdev);
-	dev_set_drvdata(&of_dev->dev, NULL);
 
 	return 0;
 }
 
-static struct of_device_id of_fsl_dma_ids[] = {
+static const struct of_device_id fsldma_of_ids[] = {
 	{ .compatible = "fsl,eloplus-dma", },
 	{ .compatible = "fsl,elo-dma", },
 	{}
 };
 
-static struct of_platform_driver of_fsl_dma_driver = {
-	.name = "fsl-elo-dma",
-	.match_table = of_fsl_dma_ids,
-	.probe = of_fsl_dma_probe,
-	.remove = of_fsl_dma_remove,
+static struct of_platform_driver fsldma_of_driver = {
+	.name		= "fsl-elo-dma",
+	.match_table	= fsldma_of_ids,
+	.probe		= fsldma_of_probe,
+	.remove		= fsldma_of_remove,
 };
 
-static __init int of_fsl_dma_init(void)
+/*----------------------------------------------------------------------------*/
+/* Module Init / Exit                                                         */
+/*----------------------------------------------------------------------------*/
+
+static __init int fsldma_init(void)
 {
 	int ret;
 
 	pr_info("Freescale Elo / Elo Plus DMA driver\n");
 
-	ret = of_register_platform_driver(&of_fsl_dma_driver);
+	ret = of_register_platform_driver(&fsldma_of_driver);
 	if (ret)
 		pr_err("fsldma: failed to register platform driver\n");
 
 	return ret;
 }
 
-static void __exit of_fsl_dma_exit(void)
+static void __exit fsldma_exit(void)
 {
-	of_unregister_platform_driver(&of_fsl_dma_driver);
+	of_unregister_platform_driver(&fsldma_of_driver);
 }
 
-subsys_initcall(of_fsl_dma_init);
-module_exit(of_fsl_dma_exit);
+subsys_initcall(fsldma_init);
+module_exit(fsldma_exit);
 
 MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 0df14cb..cb4d6ff 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -92,11 +92,9 @@
 	struct list_head node;
 	struct list_head tx_list;
 	struct dma_async_tx_descriptor async_tx;
-	struct list_head *ld;
-	void *priv;
 } __attribute__((aligned(32)));
 
-struct fsl_dma_chan_regs {
+struct fsldma_chan_regs {
 	u32 mr;	/* 0x00 - Mode Register */
 	u32 sr;	/* 0x04 - Status Register */
 	u64 cdar;	/* 0x08 - Current descriptor address register */
@@ -106,20 +104,19 @@
 	u64 ndar;	/* 0x24 - Next Descriptor Address Register */
 };
 
-struct fsl_dma_chan;
+struct fsldma_chan;
 #define FSL_DMA_MAX_CHANS_PER_DEVICE 4
 
-struct fsl_dma_device {
-	void __iomem *reg_base;	/* DGSR register base */
-	struct resource reg;	/* Resource for register */
+struct fsldma_device {
+	void __iomem *regs;	/* DGSR register base */
 	struct device *dev;
 	struct dma_device common;
-	struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
+	struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
 	u32 feature;		/* The same as DMA channels */
 	int irq;		/* Channel IRQ */
 };
 
-/* Define macros for fsl_dma_chan->feature property */
+/* Define macros for fsldma_chan->feature property */
 #define FSL_DMA_LITTLE_ENDIAN	0x00000000
 #define FSL_DMA_BIG_ENDIAN	0x00000001
 
@@ -130,28 +127,28 @@
 #define FSL_DMA_CHAN_PAUSE_EXT	0x00001000
 #define FSL_DMA_CHAN_START_EXT	0x00002000
 
-struct fsl_dma_chan {
-	struct fsl_dma_chan_regs __iomem *reg_base;
+struct fsldma_chan {
+	struct fsldma_chan_regs __iomem *regs;
 	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
 	spinlock_t desc_lock;		/* Descriptor operation lock */
-	struct list_head ld_queue;	/* Link descriptors queue */
+	struct list_head ld_pending;	/* Link descriptors queue */
+	struct list_head ld_running;	/* Link descriptors queue */
 	struct dma_chan common;		/* DMA common channel */
 	struct dma_pool *desc_pool;	/* Descriptors pool */
 	struct device *dev;		/* Channel device */
-	struct resource reg;		/* Resource for register */
 	int irq;			/* Channel IRQ */
 	int id;				/* Raw id of this channel */
 	struct tasklet_struct tasklet;
 	u32 feature;
 
-	void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
-	void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
-	void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
-	void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
-	void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
+	void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
+	void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
+	void (*set_src_loop_size)(struct fsldma_chan *fsl_chan, int size);
+	void (*set_dst_loop_size)(struct fsldma_chan *fsl_chan, int size);
+	void (*set_request_count)(struct fsldma_chan *fsl_chan, int size);
 };
 
-#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
+#define to_fsl_chan(chan) container_of(chan, struct fsldma_chan, common)
 #define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
 #define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
 
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index dcc4ab7..5d0e42b 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -94,16 +94,12 @@
 	return IRQ_HANDLED;
 }
 
-static void ioat1_cleanup_tasklet(unsigned long data);
-
 /* common channel initialization */
-void ioat_init_channel(struct ioatdma_device *device,
-		       struct ioat_chan_common *chan, int idx,
-		       void (*timer_fn)(unsigned long),
-		       void (*tasklet)(unsigned long),
-		       unsigned long ioat)
+void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx)
 {
 	struct dma_device *dma = &device->common;
+	struct dma_chan *c = &chan->common;
+	unsigned long data = (unsigned long) c;
 
 	chan->device = device;
 	chan->reg_base = device->reg_base + (0x80 * (idx + 1));
@@ -112,14 +108,12 @@
 	list_add_tail(&chan->common.device_node, &dma->channels);
 	device->idx[idx] = chan;
 	init_timer(&chan->timer);
-	chan->timer.function = timer_fn;
-	chan->timer.data = ioat;
-	tasklet_init(&chan->cleanup_task, tasklet, ioat);
+	chan->timer.function = device->timer_fn;
+	chan->timer.data = data;
+	tasklet_init(&chan->cleanup_task, device->cleanup_fn, data);
 	tasklet_disable(&chan->cleanup_task);
 }
 
-static void ioat1_timer_event(unsigned long data);
-
 /**
  * ioat1_dma_enumerate_channels - find and initialize the device's channels
  * @device: the device to be enumerated
@@ -155,10 +149,7 @@
 		if (!ioat)
 			break;
 
-		ioat_init_channel(device, &ioat->base, i,
-				  ioat1_timer_event,
-				  ioat1_cleanup_tasklet,
-				  (unsigned long) ioat);
+		ioat_init_channel(device, &ioat->base, i);
 		ioat->xfercap = xfercap;
 		spin_lock_init(&ioat->desc_lock);
 		INIT_LIST_HEAD(&ioat->free_desc);
@@ -532,12 +523,12 @@
 	return &desc->txd;
 }
 
-static void ioat1_cleanup_tasklet(unsigned long data)
+static void ioat1_cleanup_event(unsigned long data)
 {
-	struct ioat_dma_chan *chan = (void *)data;
+	struct ioat_dma_chan *ioat = to_ioat_chan((void *) data);
 
-	ioat1_cleanup(chan);
-	writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
+	ioat1_cleanup(ioat);
+	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 }
 
 void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
@@ -687,7 +678,7 @@
 
 static void ioat1_timer_event(unsigned long data)
 {
-	struct ioat_dma_chan *ioat = (void *) data;
+	struct ioat_dma_chan *ioat = to_ioat_chan((void *) data);
 	struct ioat_chan_common *chan = &ioat->base;
 
 	dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
@@ -734,16 +725,17 @@
 	spin_unlock_bh(&chan->cleanup_lock);
 }
 
-static enum dma_status
-ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+enum dma_status
+ioat_is_dma_complete(struct dma_chan *c, dma_cookie_t cookie,
 		      dma_cookie_t *done, dma_cookie_t *used)
 {
-	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+	struct ioat_chan_common *chan = to_chan_common(c);
+	struct ioatdma_device *device = chan->device;
 
 	if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
 		return DMA_SUCCESS;
 
-	ioat1_cleanup(ioat);
+	device->cleanup_fn((unsigned long) c);
 
 	return ioat_is_complete(c, cookie, done, used);
 }
@@ -1199,12 +1191,14 @@
 	device->intr_quirk = ioat1_intr_quirk;
 	device->enumerate_channels = ioat1_enumerate_channels;
 	device->self_test = ioat_dma_self_test;
+	device->timer_fn = ioat1_timer_event;
+	device->cleanup_fn = ioat1_cleanup_event;
 	dma = &device->common;
 	dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
 	dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
 	dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
-	dma->device_is_tx_complete = ioat1_dma_is_complete;
+	dma->device_is_tx_complete = ioat_is_dma_complete;
 
 	err = ioat_probe(device);
 	if (err)
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index bbc3e78..4f747a2 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -61,7 +61,7 @@
  * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
  * @enumerate_channels: hw version specific channel enumeration
  * @reset_hw: hw version specific channel (re)initialization
- * @cleanup_tasklet: select between the v2 and v3 cleanup routines
+ * @cleanup_fn: select between the v2 and v3 cleanup routines
  * @timer_fn: select between the v2 and v3 timer watchdog routines
  * @self_test: hardware version specific self test for each supported op type
  *
@@ -80,7 +80,7 @@
 	void (*intr_quirk)(struct ioatdma_device *device);
 	int (*enumerate_channels)(struct ioatdma_device *device);
 	int (*reset_hw)(struct ioat_chan_common *chan);
-	void (*cleanup_tasklet)(unsigned long data);
+	void (*cleanup_fn)(unsigned long data);
 	void (*timer_fn)(unsigned long data);
 	int (*self_test)(struct ioatdma_device *device);
 };
@@ -337,10 +337,9 @@
 					      void __iomem *iobase);
 unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
 void ioat_init_channel(struct ioatdma_device *device,
-		       struct ioat_chan_common *chan, int idx,
-		       void (*timer_fn)(unsigned long),
-		       void (*tasklet)(unsigned long),
-		       unsigned long ioat);
+		       struct ioat_chan_common *chan, int idx);
+enum dma_status ioat_is_dma_complete(struct dma_chan *c, dma_cookie_t cookie,
+				     dma_cookie_t *done, dma_cookie_t *used);
 void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
 		    size_t len, struct ioat_dma_descriptor *hw);
 bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 5cc37af..1ed5d66 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -51,48 +51,40 @@
 
 void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
 {
-	void * __iomem reg_base = ioat->base.reg_base;
+	struct ioat_chan_common *chan = &ioat->base;
 
-	ioat->pending = 0;
 	ioat->dmacount += ioat2_ring_pending(ioat);
 	ioat->issued = ioat->head;
 	/* make descriptor updates globally visible before notifying channel */
 	wmb();
-	writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-	dev_dbg(to_dev(&ioat->base),
+	writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+	dev_dbg(to_dev(chan),
 		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
 		__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
 }
 
-void ioat2_issue_pending(struct dma_chan *chan)
+void ioat2_issue_pending(struct dma_chan *c)
 {
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
+	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
 
-	spin_lock_bh(&ioat->ring_lock);
-	if (ioat->pending == 1)
+	if (ioat2_ring_pending(ioat)) {
+		spin_lock_bh(&ioat->ring_lock);
 		__ioat2_issue_pending(ioat);
-	spin_unlock_bh(&ioat->ring_lock);
+		spin_unlock_bh(&ioat->ring_lock);
+	}
 }
 
 /**
  * ioat2_update_pending - log pending descriptors
  * @ioat: ioat2+ channel
  *
- * set pending to '1' unless pending is already set to '2', pending == 2
- * indicates that submission is temporarily blocked due to an in-flight
- * reset.  If we are already above the ioat_pending_level threshold then
- * just issue pending.
- *
- * called with ring_lock held
+ * Check if the number of unsubmitted descriptors has exceeded the
+ * watermark.  Called with ring_lock held
  */
 static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
 {
-	if (unlikely(ioat->pending == 2))
-		return;
-	else if (ioat2_ring_pending(ioat) > ioat_pending_level)
+	if (ioat2_ring_pending(ioat) > ioat_pending_level)
 		__ioat2_issue_pending(ioat);
-	else
-		ioat->pending = 1;
 }
 
 static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
@@ -166,7 +158,7 @@
 			seen_current = true;
 	}
 	ioat->tail += i;
-	BUG_ON(!seen_current); /* no active descs have written a completion? */
+	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
 
 	chan->last_completion = phys_complete;
 	if (ioat->head == ioat->tail) {
@@ -207,9 +199,9 @@
 	spin_unlock_bh(&chan->cleanup_lock);
 }
 
-void ioat2_cleanup_tasklet(unsigned long data)
+void ioat2_cleanup_event(unsigned long data)
 {
-	struct ioat2_dma_chan *ioat = (void *) data;
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 
 	ioat2_cleanup(ioat);
 	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
@@ -291,7 +283,7 @@
 
 void ioat2_timer_event(unsigned long data)
 {
-	struct ioat2_dma_chan *ioat = (void *) data;
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 	struct ioat_chan_common *chan = &ioat->base;
 
 	spin_lock_bh(&chan->cleanup_lock);
@@ -397,10 +389,7 @@
 		if (!ioat)
 			break;
 
-		ioat_init_channel(device, &ioat->base, i,
-				  device->timer_fn,
-				  device->cleanup_tasklet,
-				  (unsigned long) ioat);
+		ioat_init_channel(device, &ioat->base, i);
 		ioat->xfercap_log = xfercap_log;
 		spin_lock_init(&ioat->ring_lock);
 		if (device->reset_hw(&ioat->base)) {
@@ -546,7 +535,6 @@
 	ioat->head = 0;
 	ioat->issued = 0;
 	ioat->tail = 0;
-	ioat->pending = 0;
 	ioat->alloc_order = order;
 	spin_unlock_bh(&ioat->ring_lock);
 
@@ -701,7 +689,7 @@
 
 			mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
 			spin_unlock_bh(&chan->cleanup_lock);
-			device->timer_fn((unsigned long) ioat);
+			device->timer_fn((unsigned long) &chan->common);
 		} else
 			spin_unlock_bh(&chan->cleanup_lock);
 		return -ENOMEM;
@@ -785,7 +773,7 @@
 
 	tasklet_disable(&chan->cleanup_task);
 	del_timer_sync(&chan->timer);
-	device->cleanup_tasklet((unsigned long) ioat);
+	device->cleanup_fn((unsigned long) c);
 	device->reset_hw(chan);
 
 	spin_lock_bh(&ioat->ring_lock);
@@ -815,25 +803,9 @@
 
 	chan->last_completion = 0;
 	chan->completion_dma = 0;
-	ioat->pending = 0;
 	ioat->dmacount = 0;
 }
 
-enum dma_status
-ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
-		     dma_cookie_t *done, dma_cookie_t *used)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioatdma_device *device = ioat->base.device;
-
-	if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
-		return DMA_SUCCESS;
-
-	device->cleanup_tasklet((unsigned long) ioat);
-
-	return ioat_is_complete(c, cookie, done, used);
-}
-
 static ssize_t ring_size_show(struct dma_chan *c, char *page)
 {
 	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
@@ -874,7 +846,7 @@
 
 	device->enumerate_channels = ioat2_enumerate_channels;
 	device->reset_hw = ioat2_reset_hw;
-	device->cleanup_tasklet = ioat2_cleanup_tasklet;
+	device->cleanup_fn = ioat2_cleanup_event;
 	device->timer_fn = ioat2_timer_event;
 	device->self_test = ioat_dma_self_test;
 	dma = &device->common;
@@ -882,7 +854,7 @@
 	dma->device_issue_pending = ioat2_issue_pending;
 	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
 	dma->device_free_chan_resources = ioat2_free_chan_resources;
-	dma->device_is_tx_complete = ioat2_is_complete;
+	dma->device_is_tx_complete = ioat_is_dma_complete;
 
 	err = ioat_probe(device);
 	if (err)
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index 3afad8d..ef2871f 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -47,7 +47,6 @@
  * @head: allocated index
  * @issued: hardware notification point
  * @tail: cleanup index
- * @pending: lock free indicator for issued != head
  * @dmacount: identical to 'head' except for occasionally resetting to zero
  * @alloc_order: log2 of the number of allocated descriptors
  * @ring: software ring buffer implementation of hardware ring
@@ -61,7 +60,6 @@
 	u16 tail;
 	u16 dmacount;
 	u16 alloc_order;
-	int pending;
 	struct ioat_ring_ent **ring;
 	spinlock_t ring_lock;
 };
@@ -178,12 +176,10 @@
 void ioat2_issue_pending(struct dma_chan *chan);
 int ioat2_alloc_chan_resources(struct dma_chan *c);
 void ioat2_free_chan_resources(struct dma_chan *c);
-enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
-				  dma_cookie_t *done, dma_cookie_t *used);
 void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
 bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
 void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
-void ioat2_cleanup_tasklet(unsigned long data);
+void ioat2_cleanup_event(unsigned long data);
 void ioat2_timer_event(unsigned long data);
 int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo);
 int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 9908c9e..26febc5 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -293,17 +293,25 @@
 		}
 	}
 	ioat->tail += i;
-	BUG_ON(!seen_current); /* no active descs have written a completion? */
+	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
 	chan->last_completion = phys_complete;
-	if (ioat->head == ioat->tail) {
+
+	active = ioat2_ring_active(ioat);
+	if (active == 0) {
 		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
 			__func__);
 		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
 		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
 	}
+	/* 5 microsecond delay per pending descriptor */
+	writew(min((5 * active), IOAT_INTRDELAY_MASK),
+	       chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
 }
 
-static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
+/* try to cleanup, but yield (via spin_trylock) to incoming submissions
+ * with the expectation that we will immediately poll again shortly
+ */
+static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat)
 {
 	struct ioat_chan_common *chan = &ioat->base;
 	unsigned long phys_complete;
@@ -329,29 +337,41 @@
 	spin_unlock_bh(&chan->cleanup_lock);
 }
 
-static void ioat3_cleanup_tasklet(unsigned long data)
+/* run cleanup now because we already delayed the interrupt via INTRDELAY */
+static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat)
 {
-	struct ioat2_dma_chan *ioat = (void *) data;
+	struct ioat_chan_common *chan = &ioat->base;
+	unsigned long phys_complete;
 
-	ioat3_cleanup(ioat);
-	writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
-	       ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+	prefetch(chan->completion);
+
+	spin_lock_bh(&chan->cleanup_lock);
+	if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+		spin_unlock_bh(&chan->cleanup_lock);
+		return;
+	}
+	spin_lock_bh(&ioat->ring_lock);
+
+	__cleanup(ioat, phys_complete);
+
+	spin_unlock_bh(&ioat->ring_lock);
+	spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat3_cleanup_event(unsigned long data)
+{
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
+
+	ioat3_cleanup_sync(ioat);
+	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 }
 
 static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
 {
 	struct ioat_chan_common *chan = &ioat->base;
 	unsigned long phys_complete;
-	u32 status;
 
-	status = ioat_chansts(chan);
-	if (is_ioat_active(status) || is_ioat_idle(status))
-		ioat_suspend(chan);
-	while (is_ioat_active(status) || is_ioat_idle(status)) {
-		status = ioat_chansts(chan);
-		cpu_relax();
-	}
-
+	ioat2_quiesce(chan, 0);
 	if (ioat_cleanup_preamble(chan, &phys_complete))
 		__cleanup(ioat, phys_complete);
 
@@ -360,7 +380,7 @@
 
 static void ioat3_timer_event(unsigned long data)
 {
-	struct ioat2_dma_chan *ioat = (void *) data;
+	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
 	struct ioat_chan_common *chan = &ioat->base;
 
 	spin_lock_bh(&chan->cleanup_lock);
@@ -426,7 +446,7 @@
 	if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
 		return DMA_SUCCESS;
 
-	ioat3_cleanup(ioat);
+	ioat3_cleanup_poll(ioat);
 
 	return ioat_is_complete(c, cookie, done, used);
 }
@@ -1239,11 +1259,11 @@
 
 	if (is_raid_device) {
 		dma->device_is_tx_complete = ioat3_is_complete;
-		device->cleanup_tasklet = ioat3_cleanup_tasklet;
+		device->cleanup_fn = ioat3_cleanup_event;
 		device->timer_fn = ioat3_timer_event;
 	} else {
-		dma->device_is_tx_complete = ioat2_is_complete;
-		device->cleanup_tasklet = ioat2_cleanup_tasklet;
+		dma->device_is_tx_complete = ioat_is_dma_complete;
+		device->cleanup_fn = ioat2_cleanup_event;
 		device->timer_fn = ioat2_timer_event;
 	}
 
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index e8ae63b..1391798 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -60,7 +60,7 @@
 #define IOAT_PERPORTOFFSET_OFFSET		0x0A	/* 16-bit */
 
 #define IOAT_INTRDELAY_OFFSET			0x0C	/* 16-bit */
-#define IOAT_INTRDELAY_INT_DELAY_MASK		0x3FFF	/* Interrupt Delay Time */
+#define IOAT_INTRDELAY_MASK			0x3FFF	/* Interrupt Delay Time */
 #define IOAT_INTRDELAY_COALESE_SUPPORT		0x8000	/* Interrupt Coalescing Supported */
 
 #define IOAT_DEVICE_STATUS_OFFSET		0x0E	/* 16-bit */
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index e80bae1..2a44639 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -348,6 +348,7 @@
 		break;
 	case IPU_PIX_FMT_BGRA32:
 	case IPU_PIX_FMT_BGR32:
+	case IPU_PIX_FMT_ABGR32:
 		params->ip.bpp	= 0;
 		params->ip.pfs	= 4;
 		params->ip.npb	= 7;
@@ -376,20 +377,6 @@
 		params->ip.wid2	= 7;		/* Blue bit width - 1 */
 		params->ip.wid3	= 7;		/* Alpha bit width - 1 */
 		break;
-	case IPU_PIX_FMT_ABGR32:
-		params->ip.bpp	= 0;
-		params->ip.pfs	= 4;
-		params->ip.npb	= 7;
-		params->ip.sat	= 2;		/* SAT = 32-bit access */
-		params->ip.ofs0	= 8;		/* Red bit offset */
-		params->ip.ofs1	= 16;		/* Green bit offset */
-		params->ip.ofs2	= 24;		/* Blue bit offset */
-		params->ip.ofs3	= 0;		/* Alpha bit offset */
-		params->ip.wid0	= 7;		/* Red bit width - 1 */
-		params->ip.wid1	= 7;		/* Green bit width - 1 */
-		params->ip.wid2	= 7;		/* Blue bit width - 1 */
-		params->ip.wid3	= 7;		/* Alpha bit width - 1 */
-		break;
 	case IPU_PIX_FMT_UYVY:
 		params->ip.bpp	= 2;
 		params->ip.pfs	= 6;
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
new file mode 100644
index 0000000..3fdf1f4
--- /dev/null
+++ b/drivers/dma/mpc512x_dma.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
+ * Copyright (C) Semihalf 2009
+ *
+ * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
+ * (defines, structures and comments) was taken from MPC5121 DMA driver
+ * written by Hongjun Chen <hong-jun.chen@freescale.com>.
+ *
+ * Approved as OSADL project by a majority of OSADL members and funded
+ * by OSADL membership fees in 2009;  for details see www.osadl.org.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This is initial version of MPC5121 DMA driver. Only memory to memory
+ * transfers are supported (tested using dmatest module).
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+
+#include <linux/random.h>
+
+/* Number of DMA Transfer descriptors allocated per channel */
+#define MPC_DMA_DESCRIPTORS	64
+
+/* Macro definitions */
+#define MPC_DMA_CHANNELS	64
+#define MPC_DMA_TCD_OFFSET	0x1000
+
+/* Arbitration mode of group and channel */
+#define MPC_DMA_DMACR_EDCG	(1 << 31)
+#define MPC_DMA_DMACR_ERGA	(1 << 3)
+#define MPC_DMA_DMACR_ERCA	(1 << 2)
+
+/* Error codes */
+#define MPC_DMA_DMAES_VLD	(1 << 31)
+#define MPC_DMA_DMAES_GPE	(1 << 15)
+#define MPC_DMA_DMAES_CPE	(1 << 14)
+#define MPC_DMA_DMAES_ERRCHN(err) \
+				(((err) >> 8) & 0x3f)
+#define MPC_DMA_DMAES_SAE	(1 << 7)
+#define MPC_DMA_DMAES_SOE	(1 << 6)
+#define MPC_DMA_DMAES_DAE	(1 << 5)
+#define MPC_DMA_DMAES_DOE	(1 << 4)
+#define MPC_DMA_DMAES_NCE	(1 << 3)
+#define MPC_DMA_DMAES_SGE	(1 << 2)
+#define MPC_DMA_DMAES_SBE	(1 << 1)
+#define MPC_DMA_DMAES_DBE	(1 << 0)
+
+#define MPC_DMA_TSIZE_1		0x00
+#define MPC_DMA_TSIZE_2		0x01
+#define MPC_DMA_TSIZE_4		0x02
+#define MPC_DMA_TSIZE_16	0x04
+#define MPC_DMA_TSIZE_32	0x05
+
+/* MPC5121 DMA engine registers */
+struct __attribute__ ((__packed__)) mpc_dma_regs {
+	/* 0x00 */
+	u32 dmacr;		/* DMA control register */
+	u32 dmaes;		/* DMA error status */
+	/* 0x08 */
+	u32 dmaerqh;		/* DMA enable request high(channels 63~32) */
+	u32 dmaerql;		/* DMA enable request low(channels 31~0) */
+	u32 dmaeeih;		/* DMA enable error interrupt high(ch63~32) */
+	u32 dmaeeil;		/* DMA enable error interrupt low(ch31~0) */
+	/* 0x18 */
+	u8 dmaserq;		/* DMA set enable request */
+	u8 dmacerq;		/* DMA clear enable request */
+	u8 dmaseei;		/* DMA set enable error interrupt */
+	u8 dmaceei;		/* DMA clear enable error interrupt */
+	/* 0x1c */
+	u8 dmacint;		/* DMA clear interrupt request */
+	u8 dmacerr;		/* DMA clear error */
+	u8 dmassrt;		/* DMA set start bit */
+	u8 dmacdne;		/* DMA clear DONE status bit */
+	/* 0x20 */
+	u32 dmainth;		/* DMA interrupt request high(ch63~32) */
+	u32 dmaintl;		/* DMA interrupt request low(ch31~0) */
+	u32 dmaerrh;		/* DMA error high(ch63~32) */
+	u32 dmaerrl;		/* DMA error low(ch31~0) */
+	/* 0x30 */
+	u32 dmahrsh;		/* DMA hw request status high(ch63~32) */
+	u32 dmahrsl;		/* DMA hardware request status low(ch31~0) */
+	u32 dmaihsa;		/* DMA interrupt high select AXE(ch63~32) */
+	u32 dmailsa;		/* DMA interrupt low select AXE(ch31~0) */
+	/* 0x40 ~ 0xff */
+	u32 reserve0[48];	/* Reserved */
+	/* 0x100 */
+	u8 dchpri[MPC_DMA_CHANNELS];
+	/* DMA channels(0~63) priority */
+};
+
+struct __attribute__ ((__packed__)) mpc_dma_tcd {
+	/* 0x00 */
+	u32 saddr;		/* Source address */
+
+	u32 smod:5;		/* Source address modulo */
+	u32 ssize:3;		/* Source data transfer size */
+	u32 dmod:5;		/* Destination address modulo */
+	u32 dsize:3;		/* Destination data transfer size */
+	u32 soff:16;		/* Signed source address offset */
+
+	/* 0x08 */
+	u32 nbytes;		/* Inner "minor" byte count */
+	u32 slast;		/* Last source address adjustment */
+	u32 daddr;		/* Destination address */
+
+	/* 0x14 */
+	u32 citer_elink:1;	/* Enable channel-to-channel linking on
+				 * minor loop complete
+				 */
+	u32 citer_linkch:6;	/* Link channel for minor loop complete */
+	u32 citer:9;		/* Current "major" iteration count */
+	u32 doff:16;		/* Signed destination address offset */
+
+	/* 0x18 */
+	u32 dlast_sga;		/* Last Destination address adjustment/scatter
+				 * gather address
+				 */
+
+	/* 0x1c */
+	u32 biter_elink:1;	/* Enable channel-to-channel linking on major
+				 * loop complete
+				 */
+	u32 biter_linkch:6;
+	u32 biter:9;		/* Beginning "major" iteration count */
+	u32 bwc:2;		/* Bandwidth control */
+	u32 major_linkch:6;	/* Link channel number */
+	u32 done:1;		/* Channel done */
+	u32 active:1;		/* Channel active */
+	u32 major_elink:1;	/* Enable channel-to-channel linking on major
+				 * loop complete
+				 */
+	u32 e_sg:1;		/* Enable scatter/gather processing */
+	u32 d_req:1;		/* Disable request */
+	u32 int_half:1;		/* Enable an interrupt when major counter is
+				 * half complete
+				 */
+	u32 int_maj:1;		/* Enable an interrupt when major iteration
+				 * count completes
+				 */
+	u32 start:1;		/* Channel start */
+};
+
+struct mpc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct mpc_dma_tcd		*tcd;
+	dma_addr_t			tcd_paddr;
+	int				error;
+	struct list_head		node;
+};
+
+struct mpc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	struct mpc_dma_tcd		*tcd;
+	dma_addr_t			tcd_paddr;
+	dma_cookie_t			completed_cookie;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+};
+
+struct mpc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct mpc_dma_chan		channels[MPC_DMA_CHANNELS];
+	struct mpc_dma_regs __iomem	*regs;
+	struct mpc_dma_tcd __iomem	*tcd;
+	int				irq;
+	uint				error_status;
+
+	/* Lock for error_status field in this structure */
+	spinlock_t			error_status_lock;
+};
+
+#define DRV_NAME	"mpc512x_dma"
+
+/* Convert struct dma_chan to struct mpc_dma_chan */
+static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct mpc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct mpc_dma */
+static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c);
+	return container_of(mchan, struct mpc_dma, channels[c->chan_id]);
+}
+
+/*
+ * Execute all queued DMA descriptors.
+ *
+ * Following requirements must be met while calling mpc_dma_execute():
+ * 	a) mchan->lock is acquired,
+ * 	b) mchan->active list is empty,
+ * 	c) mchan->queued list contains at least one entry.
+ */
+static void mpc_dma_execute(struct mpc_dma_chan *mchan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan);
+	struct mpc_dma_desc *first = NULL;
+	struct mpc_dma_desc *prev = NULL;
+	struct mpc_dma_desc *mdesc;
+	int cid = mchan->chan.chan_id;
+
+	/* Move all queued descriptors to active list */
+	list_splice_tail_init(&mchan->queued, &mchan->active);
+
+	/* Chain descriptors into one transaction */
+	list_for_each_entry(mdesc, &mchan->active, node) {
+		if (!first)
+			first = mdesc;
+
+		if (!prev) {
+			prev = mdesc;
+			continue;
+		}
+
+		prev->tcd->dlast_sga = mdesc->tcd_paddr;
+		prev->tcd->e_sg = 1;
+		mdesc->tcd->start = 1;
+
+		prev = mdesc;
+	}
+
+	prev->tcd->start = 0;
+	prev->tcd->int_maj = 1;
+
+	/* Send first descriptor in chain into hardware */
+	memcpy_toio(&mdma->tcd[cid], first->tcd, sizeof(struct mpc_dma_tcd));
+	out_8(&mdma->regs->dmassrt, cid);
+}
+
+/* Handle interrupt on one half of DMA controller (32 channels) */
+static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off)
+{
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma_desc *mdesc;
+	u32 status = is | es;
+	int ch;
+
+	while ((ch = fls(status) - 1) >= 0) {
+		status &= ~(1 << ch);
+		mchan = &mdma->channels[ch + off];
+
+		spin_lock(&mchan->lock);
+
+		/* Check error status */
+		if (es & (1 << ch))
+			list_for_each_entry(mdesc, &mchan->active, node)
+				mdesc->error = -EIO;
+
+		/* Execute queued descriptors */
+		list_splice_tail_init(&mchan->active, &mchan->completed);
+		if (!list_empty(&mchan->queued))
+			mpc_dma_execute(mchan);
+
+		spin_unlock(&mchan->lock);
+	}
+}
+
+/* Interrupt handler */
+static irqreturn_t mpc_dma_irq(int irq, void *data)
+{
+	struct mpc_dma *mdma = data;
+	uint es;
+
+	/* Save error status register */
+	es = in_be32(&mdma->regs->dmaes);
+	spin_lock(&mdma->error_status_lock);
+	if ((es & MPC_DMA_DMAES_VLD) && mdma->error_status == 0)
+		mdma->error_status = es;
+	spin_unlock(&mdma->error_status_lock);
+
+	/* Handle interrupt on each channel */
+	mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth),
+					in_be32(&mdma->regs->dmaerrh), 32);
+	mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmaintl),
+					in_be32(&mdma->regs->dmaerrl), 0);
+
+	/* Ack interrupt on all channels */
+	out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
+	out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
+	out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
+	out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
+
+	/* Schedule tasklet */
+	tasklet_schedule(&mdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* DMA Tasklet */
+static void mpc_dma_tasklet(unsigned long data)
+{
+	struct mpc_dma *mdma = (void *)data;
+	dma_cookie_t last_cookie = 0;
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma_desc *mdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	LIST_HEAD(list);
+	uint es;
+	int i;
+
+	spin_lock_irqsave(&mdma->error_status_lock, flags);
+	es = mdma->error_status;
+	mdma->error_status = 0;
+	spin_unlock_irqrestore(&mdma->error_status_lock, flags);
+
+	/* Print nice error report */
+	if (es) {
+		dev_err(mdma->dma.dev,
+			"Hardware reported following error(s) on channel %u:\n",
+						      MPC_DMA_DMAES_ERRCHN(es));
+
+		if (es & MPC_DMA_DMAES_GPE)
+			dev_err(mdma->dma.dev, "- Group Priority Error\n");
+		if (es & MPC_DMA_DMAES_CPE)
+			dev_err(mdma->dma.dev, "- Channel Priority Error\n");
+		if (es & MPC_DMA_DMAES_SAE)
+			dev_err(mdma->dma.dev, "- Source Address Error\n");
+		if (es & MPC_DMA_DMAES_SOE)
+			dev_err(mdma->dma.dev, "- Source Offset"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_DAE)
+			dev_err(mdma->dma.dev, "- Destination Address"
+								" Error\n");
+		if (es & MPC_DMA_DMAES_DOE)
+			dev_err(mdma->dma.dev, "- Destination Offset"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_NCE)
+			dev_err(mdma->dma.dev, "- NBytes/Citter"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_SGE)
+			dev_err(mdma->dma.dev, "- Scatter/Gather"
+						" Configuration Error\n");
+		if (es & MPC_DMA_DMAES_SBE)
+			dev_err(mdma->dma.dev, "- Source Bus Error\n");
+		if (es & MPC_DMA_DMAES_DBE)
+			dev_err(mdma->dma.dev, "- Destination Bus Error\n");
+	}
+
+	for (i = 0; i < mdma->dma.chancnt; i++) {
+		mchan = &mdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&mchan->lock, flags);
+		if (!list_empty(&mchan->completed))
+			list_splice_tail_init(&mchan->completed, &list);
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		if (list_empty(&list))
+			continue;
+
+		/* Execute callbacks and run dependencies */
+		list_for_each_entry(mdesc, &list, node) {
+			desc = &mdesc->desc;
+
+			if (desc->callback)
+				desc->callback(desc->callback_param);
+
+			last_cookie = desc->cookie;
+			dma_run_dependencies(desc);
+		}
+
+		/* Free descriptors */
+		spin_lock_irqsave(&mchan->lock, flags);
+		list_splice_tail_init(&list, &mchan->free);
+		mchan->completed_cookie = last_cookie;
+		spin_unlock_irqrestore(&mchan->lock, flags);
+	}
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(txd->chan);
+	struct mpc_dma_desc *mdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	mdesc = container_of(txd, struct mpc_dma_desc, desc);
+
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&mdesc->node, &mchan->queued);
+
+	/* If channel is idle, execute all queued descriptors */
+	if (list_empty(&mchan->active))
+		mpc_dma_execute(mchan);
+
+	/* Update cookie */
+	cookie = mchan->chan.cookie + 1;
+	if (cookie <= 0)
+		cookie = 1;
+
+	mchan->chan.cookie = cookie;
+	mdesc->desc.cookie = cookie;
+
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	return cookie;
+}
+
+/* Alloc channel resources */
+static int mpc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc;
+	struct mpc_dma_tcd *tcd;
+	dma_addr_t tcd_paddr;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	/* Alloc DMA memory for Transfer Control Descriptors */
+	tcd = dma_alloc_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+							&tcd_paddr, GFP_KERNEL);
+	if (!tcd)
+		return -ENOMEM;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) {
+		mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL);
+		if (!mdesc) {
+			dev_notice(mdma->dma.dev, "Memory allocation error. "
+					"Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&mdesc->desc, chan);
+		mdesc->desc.flags = DMA_CTRL_ACK;
+		mdesc->desc.tx_submit = mpc_dma_tx_submit;
+
+		mdesc->tcd = &tcd[i];
+		mdesc->tcd_paddr = tcd_paddr + (i * sizeof(struct mpc_dma_tcd));
+
+		list_add_tail(&mdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0) {
+		dma_free_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+								tcd, tcd_paddr);
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&mchan->lock, flags);
+	mchan->tcd = tcd;
+	mchan->tcd_paddr = tcd_paddr;
+	list_splice_tail_init(&descs, &mchan->free);
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	/* Enable Error Interrupt */
+	out_8(&mdma->regs->dmaseei, chan->chan_id);
+
+	return 0;
+}
+
+/* Free channel resources */
+static void mpc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc, *tmp;
+	struct mpc_dma_tcd *tcd;
+	dma_addr_t tcd_paddr;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&mchan->prepared));
+	BUG_ON(!list_empty(&mchan->queued));
+	BUG_ON(!list_empty(&mchan->active));
+	BUG_ON(!list_empty(&mchan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&mchan->free, &descs);
+	tcd = mchan->tcd;
+	tcd_paddr = mchan->tcd_paddr;
+
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	/* Free DMA memory used by descriptors */
+	dma_free_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+								tcd, tcd_paddr);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(mdesc, tmp, &descs, node)
+		kfree(mdesc);
+
+	/* Disable Error Interrupt */
+	out_8(&mdma->regs->dmaceei, chan->chan_id);
+}
+
+/* Send all pending descriptor to hardware */
+static void mpc_dma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * We are posting descriptors to the hardware as soon as
+	 * they are ready, so this function does nothing.
+	 */
+}
+
+/* Check request completion status */
+static enum dma_status
+mpc_dma_is_tx_complete(struct dma_chan *chan, dma_cookie_t cookie,
+					dma_cookie_t *done, dma_cookie_t *used)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	unsigned long flags;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	spin_lock_irqsave(&mchan->lock, flags);
+	last_used = mchan->chan.cookie;
+	last_complete = mchan->completed_cookie;
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	if (done)
+		*done = last_complete;
+
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* Prepare descriptor for memory to memory copy */
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+					size_t len, unsigned long flags)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc = NULL;
+	struct mpc_dma_tcd *tcd;
+	unsigned long iflags;
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	if (!list_empty(&mchan->free)) {
+		mdesc = list_first_entry(&mchan->free, struct mpc_dma_desc,
+									node);
+		list_del(&mdesc->node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	if (!mdesc)
+		return NULL;
+
+	mdesc->error = 0;
+	tcd = mdesc->tcd;
+
+	/* Prepare Transfer Control Descriptor for this transaction */
+	memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+	if (IS_ALIGNED(src | dst | len, 32)) {
+		tcd->ssize = MPC_DMA_TSIZE_32;
+		tcd->dsize = MPC_DMA_TSIZE_32;
+		tcd->soff = 32;
+		tcd->doff = 32;
+	} else if (IS_ALIGNED(src | dst | len, 16)) {
+		tcd->ssize = MPC_DMA_TSIZE_16;
+		tcd->dsize = MPC_DMA_TSIZE_16;
+		tcd->soff = 16;
+		tcd->doff = 16;
+	} else if (IS_ALIGNED(src | dst | len, 4)) {
+		tcd->ssize = MPC_DMA_TSIZE_4;
+		tcd->dsize = MPC_DMA_TSIZE_4;
+		tcd->soff = 4;
+		tcd->doff = 4;
+	} else if (IS_ALIGNED(src | dst | len, 2)) {
+		tcd->ssize = MPC_DMA_TSIZE_2;
+		tcd->dsize = MPC_DMA_TSIZE_2;
+		tcd->soff = 2;
+		tcd->doff = 2;
+	} else {
+		tcd->ssize = MPC_DMA_TSIZE_1;
+		tcd->dsize = MPC_DMA_TSIZE_1;
+		tcd->soff = 1;
+		tcd->doff = 1;
+	}
+
+	tcd->saddr = src;
+	tcd->daddr = dst;
+	tcd->nbytes = len;
+	tcd->biter = 1;
+	tcd->citer = 1;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	list_add_tail(&mdesc->node, &mchan->prepared);
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	return &mdesc->desc;
+}
+
+static int __devinit mpc_dma_probe(struct of_device *op,
+					const struct of_device_id *match)
+{
+	struct device_node *dn = op->node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct mpc_dma *mdma;
+	struct mpc_dma_chan *mchan;
+	struct resource res;
+	ulong regs_start, regs_size;
+	int retval, i;
+
+	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
+	if (!mdma) {
+		dev_err(dev, "Memory exhausted!\n");
+		return -ENOMEM;
+	}
+
+	mdma->irq = irq_of_parse_and_map(dn, 0);
+	if (mdma->irq == NO_IRQ) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		return -EINVAL;
+	}
+
+	retval = of_address_to_resource(dn, 0, &res);
+	if (retval) {
+		dev_err(dev, "Error parsing memory region!\n");
+		return retval;
+	}
+
+	regs_start = res.start;
+	regs_size = res.end - res.start + 1;
+
+	if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) {
+		dev_err(dev, "Error requesting memory region!\n");
+		return -EBUSY;
+	}
+
+	mdma->regs = devm_ioremap(dev, regs_start, regs_size);
+	if (!mdma->regs) {
+		dev_err(dev, "Error mapping memory region!\n");
+		return -ENOMEM;
+	}
+
+	mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs)
+							+ MPC_DMA_TCD_OFFSET);
+
+	retval = devm_request_irq(dev, mdma->irq, &mpc_dma_irq, 0, DRV_NAME,
+									mdma);
+	if (retval) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		return -EINVAL;
+	}
+
+	spin_lock_init(&mdma->error_status_lock);
+
+	dma = &mdma->dma;
+	dma->dev = dev;
+	dma->chancnt = MPC_DMA_CHANNELS;
+	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
+	dma->device_issue_pending = mpc_dma_issue_pending;
+	dma->device_is_tx_complete = mpc_dma_is_tx_complete;
+	dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		mchan = &mdma->channels[i];
+
+		mchan->chan.device = dma;
+		mchan->chan.chan_id = i;
+		mchan->chan.cookie = 1;
+		mchan->completed_cookie = mchan->chan.cookie;
+
+		INIT_LIST_HEAD(&mchan->free);
+		INIT_LIST_HEAD(&mchan->prepared);
+		INIT_LIST_HEAD(&mchan->queued);
+		INIT_LIST_HEAD(&mchan->active);
+		INIT_LIST_HEAD(&mchan->completed);
+
+		spin_lock_init(&mchan->lock);
+		list_add_tail(&mchan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&mdma->tasklet, mpc_dma_tasklet, (unsigned long)mdma);
+
+	/*
+	 * Configure DMA Engine:
+	 * - Dynamic clock,
+	 * - Round-robin group arbitration,
+	 * - Round-robin channel arbitration.
+	 */
+	out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
+				MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
+
+	/* Disable hardware DMA requests */
+	out_be32(&mdma->regs->dmaerqh, 0);
+	out_be32(&mdma->regs->dmaerql, 0);
+
+	/* Disable error interrupts */
+	out_be32(&mdma->regs->dmaeeih, 0);
+	out_be32(&mdma->regs->dmaeeil, 0);
+
+	/* Clear interrupts status */
+	out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
+	out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
+	out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
+	out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
+
+	/* Route interrupts to IPIC */
+	out_be32(&mdma->regs->dmaihsa, 0);
+	out_be32(&mdma->regs->dmailsa, 0);
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, mdma);
+	retval = dma_async_device_register(dma);
+	if (retval) {
+		devm_free_irq(dev, mdma->irq, mdma);
+		irq_dispose_mapping(mdma->irq);
+	}
+
+	return retval;
+}
+
+static int __devexit mpc_dma_remove(struct of_device *op)
+{
+	struct device *dev = &op->dev;
+	struct mpc_dma *mdma = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&mdma->dma);
+	devm_free_irq(dev, mdma->irq, mdma);
+	irq_dispose_mapping(mdma->irq);
+
+	return 0;
+}
+
+static struct of_device_id mpc_dma_match[] = {
+	{ .compatible = "fsl,mpc5121-dma", },
+	{},
+};
+
+static struct of_platform_driver mpc_dma_driver = {
+	.match_table	= mpc_dma_match,
+	.probe		= mpc_dma_probe,
+	.remove		= __devexit_p(mpc_dma_remove),
+	.driver		= {
+		.name	= DRV_NAME,
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init mpc_dma_init(void)
+{
+	return of_register_platform_driver(&mpc_dma_driver);
+}
+module_init(mpc_dma_init);
+
+static void __exit mpc_dma_exit(void)
+{
+	of_unregister_platform_driver(&mpc_dma_driver);
+}
+module_exit(mpc_dma_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Piotr Ziecik <kosmo@semihalf.com>");
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index 0a3478e..e69d87f 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -4940,7 +4940,7 @@
 	return ret;
 }
 
-static struct of_device_id __devinitdata ppc440spe_adma_of_match[] = {
+static const struct of_device_id ppc440spe_adma_of_match[] __devinitconst = {
 	{ .compatible	= "ibm,dma-440spe", },
 	{ .compatible	= "amcc,xor-accelerator", },
 	{},
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 39c5aa7..abe3f44 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -4,7 +4,7 @@
 
 ccflags-y := -Iinclude/drm
 
-drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
+drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
 		drm_context.o drm_dma.o drm_drawable.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
diff --git a/drivers/gpu/drm/drm_buffer.c b/drivers/gpu/drm/drm_buffer.c
new file mode 100644
index 0000000..55d03ed
--- /dev/null
+++ b/drivers/gpu/drm/drm_buffer.c
@@ -0,0 +1,184 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Pauli Nieminen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Multipart buffer for coping data which is larger than the page size.
+ *
+ * Authors:
+ * Pauli Nieminen <suokkos-at-gmail-dot-com>
+ */
+
+#include "drm_buffer.h"
+
+/**
+ * Allocate the drm buffer object.
+ *
+ *   buf: Pointer to a pointer where the object is stored.
+ *   size: The number of bytes to allocate.
+ */
+int drm_buffer_alloc(struct drm_buffer **buf, int size)
+{
+	int nr_pages = size / PAGE_SIZE + 1;
+	int idx;
+
+	/* Allocating pointer table to end of structure makes drm_buffer
+	 * variable sized */
+	*buf = kzalloc(sizeof(struct drm_buffer) + nr_pages*sizeof(char *),
+			GFP_KERNEL);
+
+	if (*buf == NULL) {
+		DRM_ERROR("Failed to allocate drm buffer object to hold"
+				" %d bytes in %d pages.\n",
+				size, nr_pages);
+		return -ENOMEM;
+	}
+
+	(*buf)->size = size;
+
+	for (idx = 0; idx < nr_pages; ++idx) {
+
+		(*buf)->data[idx] =
+			kmalloc(min(PAGE_SIZE, size - idx * PAGE_SIZE),
+				GFP_KERNEL);
+
+
+		if ((*buf)->data[idx] == NULL) {
+			DRM_ERROR("Failed to allocate %dth page for drm"
+					" buffer with %d bytes and %d pages.\n",
+					idx + 1, size, nr_pages);
+			goto error_out;
+		}
+
+	}
+
+	return 0;
+
+error_out:
+
+	/* Only last element can be null pointer so check for it first. */
+	if ((*buf)->data[idx])
+		kfree((*buf)->data[idx]);
+
+	for (--idx; idx >= 0; --idx)
+		kfree((*buf)->data[idx]);
+
+	kfree(*buf);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(drm_buffer_alloc);
+
+/**
+ * Copy the user data to the begin of the buffer and reset the processing
+ * iterator.
+ *
+ *   user_data: A pointer the data that is copied to the buffer.
+ *   size: The Number of bytes to copy.
+ */
+extern int drm_buffer_copy_from_user(struct drm_buffer *buf,
+		void __user *user_data, int size)
+{
+	int nr_pages = size / PAGE_SIZE + 1;
+	int idx;
+
+	if (size > buf->size) {
+		DRM_ERROR("Requesting to copy %d bytes to a drm buffer with"
+				" %d bytes space\n",
+				size, buf->size);
+		return -EFAULT;
+	}
+
+	for (idx = 0; idx < nr_pages; ++idx) {
+
+		if (DRM_COPY_FROM_USER(buf->data[idx],
+			user_data + idx * PAGE_SIZE,
+			min(PAGE_SIZE, size - idx * PAGE_SIZE))) {
+			DRM_ERROR("Failed to copy user data (%p) to drm buffer"
+					" (%p) %dth page.\n",
+					user_data, buf, idx);
+			return -EFAULT;
+
+		}
+	}
+	buf->iterator = 0;
+	return 0;
+}
+EXPORT_SYMBOL(drm_buffer_copy_from_user);
+
+/**
+ * Free the drm buffer object
+ */
+void drm_buffer_free(struct drm_buffer *buf)
+{
+
+	if (buf != NULL) {
+
+		int nr_pages = buf->size / PAGE_SIZE + 1;
+		int idx;
+		for (idx = 0; idx < nr_pages; ++idx)
+			kfree(buf->data[idx]);
+
+		kfree(buf);
+	}
+}
+EXPORT_SYMBOL(drm_buffer_free);
+
+/**
+ * Read an object from buffer that may be split to multiple parts. If object
+ * is not split function just returns the pointer to object in buffer. But in
+ * case of split object data is copied to given stack object that is suplied
+ * by caller.
+ *
+ * The processing location of the buffer is also advanced to the next byte
+ * after the object.
+ *
+ *   objsize: The size of the objet in bytes.
+ *   stack_obj: A pointer to a memory location where object can be copied.
+ */
+void *drm_buffer_read_object(struct drm_buffer *buf,
+		int objsize, void *stack_obj)
+{
+	int idx = drm_buffer_index(buf);
+	int page = drm_buffer_page(buf);
+	void *obj = 0;
+
+	if (idx + objsize <= PAGE_SIZE) {
+		obj = &buf->data[page][idx];
+	} else {
+		/* The object is split which forces copy to temporary object.*/
+		int beginsz = PAGE_SIZE - idx;
+		memcpy(stack_obj, &buf->data[page][idx], beginsz);
+
+		memcpy(stack_obj + beginsz, &buf->data[page + 1][0],
+				objsize - beginsz);
+
+		obj = stack_obj;
+	}
+
+	drm_buffer_advance(buf, objsize);
+	return obj;
+}
+EXPORT_SYMBOL(drm_buffer_read_object);
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 7d0f00a..f2aaf39 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -836,11 +836,7 @@
 			mode_changed = true;
 		} else if (set->fb == NULL) {
 			mode_changed = true;
-		} else if ((set->fb->bits_per_pixel !=
-			 set->crtc->fb->bits_per_pixel) ||
-			 set->fb->depth != set->crtc->fb->depth)
-			fb_changed = true;
-		else
+		} else
 			fb_changed = true;
 	}
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 766c468..f3c58e2 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -125,28 +125,28 @@
 
 	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_update_drawable_info, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, 0),
-	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_mode_attachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_mode_detachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW)
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_mode_attachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_mode_detachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED)
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index ab6c973..f97e7c4 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -60,8 +60,7 @@
 #define EDID_QUIRK_FIRST_DETAILED_PREFERRED	(1 << 5)
 /* use +hsync +vsync for detailed mode */
 #define EDID_QUIRK_DETAILED_SYNC_PP		(1 << 6)
-/* define the number of Extension EDID block */
-#define MAX_EDID_EXT_NUM 4
+
 
 #define LEVEL_DMT	0
 #define LEVEL_GTF	1
@@ -114,14 +113,14 @@
 };
 
 /**
- * edid_is_valid - sanity check EDID data
+ * drm_edid_is_valid - sanity check EDID data
  * @edid: EDID data
  *
  * Sanity check the EDID block by looking at the header, the version number
  * and the checksum.  Return 0 if the EDID doesn't check out, or 1 if it's
  * valid.
  */
-static bool edid_is_valid(struct edid *edid)
+bool drm_edid_is_valid(struct edid *edid)
 {
 	int i, score = 0;
 	u8 csum = 0;
@@ -163,6 +162,7 @@
 	}
 	return 0;
 }
+EXPORT_SYMBOL(drm_edid_is_valid);
 
 /**
  * edid_vendor - match a string against EDID's obfuscated vendor field
@@ -1112,8 +1112,8 @@
 	}
 
 	/* Chose real EDID extension number */
-	edid_ext_num = edid->extensions > MAX_EDID_EXT_NUM ?
-		       MAX_EDID_EXT_NUM : edid->extensions;
+	edid_ext_num = edid->extensions > DRM_MAX_EDID_EXT_NUM ?
+		DRM_MAX_EDID_EXT_NUM : edid->extensions;
 
 	/* Find CEA extension */
 	for (i = 0; i < edid_ext_num; i++) {
@@ -1195,7 +1195,7 @@
 	for (i = 0; i < 4; i++) {
 		if (drm_do_probe_ddc_edid(adapter, buf, len))
 			return -1;
-		if (edid_is_valid((struct edid *)buf))
+		if (drm_edid_is_valid((struct edid *)buf))
 			return 0;
 	}
 
@@ -1220,7 +1220,7 @@
 	int ret;
 	struct edid *edid;
 
-	edid = kmalloc(EDID_LENGTH * (MAX_EDID_EXT_NUM + 1),
+	edid = kmalloc(EDID_LENGTH * (DRM_MAX_EDID_EXT_NUM + 1),
 		       GFP_KERNEL);
 	if (edid == NULL) {
 		dev_warn(&connector->dev->pdev->dev,
@@ -1238,14 +1238,14 @@
 	if (edid->extensions != 0) {
 		int edid_ext_num = edid->extensions;
 
-		if (edid_ext_num > MAX_EDID_EXT_NUM) {
+		if (edid_ext_num > DRM_MAX_EDID_EXT_NUM) {
 			dev_warn(&connector->dev->pdev->dev,
 				 "The number of extension(%d) is "
 				 "over max (%d), actually read number (%d)\n",
-				 edid_ext_num, MAX_EDID_EXT_NUM,
-				 MAX_EDID_EXT_NUM);
+				 edid_ext_num, DRM_MAX_EDID_EXT_NUM,
+				 DRM_MAX_EDID_EXT_NUM);
 			/* Reset EDID extension number to be read */
-			edid_ext_num = MAX_EDID_EXT_NUM;
+			edid_ext_num = DRM_MAX_EDID_EXT_NUM;
 		}
 		/* Read EDID including extensions too */
 		ret = drm_ddc_read_edid(connector, adapter, (char *)edid,
@@ -1288,8 +1288,8 @@
 		goto end;
 
 	/* Chose real EDID extension number */
-	edid_ext_num = edid->extensions > MAX_EDID_EXT_NUM ?
-		       MAX_EDID_EXT_NUM : edid->extensions;
+	edid_ext_num = edid->extensions > DRM_MAX_EDID_EXT_NUM ?
+		       DRM_MAX_EDID_EXT_NUM : edid->extensions;
 
 	/* Find CEA extension */
 	for (i = 0; i < edid_ext_num; i++) {
@@ -1346,7 +1346,7 @@
 	if (edid == NULL) {
 		return 0;
 	}
-	if (!edid_is_valid(edid)) {
+	if (!drm_edid_is_valid(edid)) {
 		dev_warn(&connector->dev->pdev->dev, "%s: EDID invalid.\n",
 			 drm_get_connector_name(connector));
 		return 0;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 0f9e9055..5054970 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -27,6 +27,7 @@
  *      Dave Airlie <airlied@linux.ie>
  *      Jesse Barnes <jesse.barnes@intel.com>
  */
+#include <linux/kernel.h>
 #include <linux/sysrq.h>
 #include <linux/fb.h>
 #include "drmP.h"
@@ -50,21 +51,6 @@
 }
 EXPORT_SYMBOL(drm_fb_helper_add_connector);
 
-static int my_atoi(const char *name)
-{
-	int val = 0;
-
-	for (;; name++) {
-		switch (*name) {
-		case '0' ... '9':
-			val = 10*val+(*name-'0');
-			break;
-		default:
-			return val;
-		}
-	}
-}
-
 /**
  * drm_fb_helper_connector_parse_command_line - parse command line for connector
  * @connector - connector to parse line for
@@ -111,7 +97,7 @@
 			namelen = i;
 			if (!refresh_specified && !bpp_specified &&
 			    !yres_specified) {
-				refresh = my_atoi(&name[i+1]);
+				refresh = simple_strtol(&name[i+1], NULL, 10);
 				refresh_specified = 1;
 				if (cvt || rb)
 					cvt = 0;
@@ -121,7 +107,7 @@
 		case '-':
 			namelen = i;
 			if (!bpp_specified && !yres_specified) {
-				bpp = my_atoi(&name[i+1]);
+				bpp = simple_strtol(&name[i+1], NULL, 10);
 				bpp_specified = 1;
 				if (cvt || rb)
 					cvt = 0;
@@ -130,7 +116,7 @@
 			break;
 		case 'x':
 			if (!yres_specified) {
-				yres = my_atoi(&name[i+1]);
+				yres = simple_strtol(&name[i+1], NULL, 10);
 				yres_specified = 1;
 			} else
 				goto done;
@@ -170,7 +156,7 @@
 		}
 	}
 	if (i < 0 && yres_specified) {
-		xres = my_atoi(name);
+		xres = simple_strtol(name, NULL, 10);
 		res_specified = 1;
 	}
 done:
@@ -694,7 +680,7 @@
 	int i;
 
 	if (var->pixclock != 0) {
-		DRM_ERROR("PIXEL CLCOK SET\n");
+		DRM_ERROR("PIXEL CLOCK SET\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 8bf3770..aa89d4b 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -192,9 +192,7 @@
 	idr_remove(&filp->object_idr, handle);
 	spin_unlock(&filp->table_lock);
 
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_handle_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_handle_unreference_unlocked(obj);
 
 	return 0;
 }
@@ -325,9 +323,7 @@
 	}
 
 err:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 	return ret;
 }
 
@@ -358,9 +354,7 @@
 		return -ENOENT;
 
 	ret = drm_gem_handle_create(file_priv, obj, &handle);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 	if (ret)
 		return ret;
 
@@ -390,7 +384,7 @@
 {
 	struct drm_gem_object *obj = ptr;
 
-	drm_gem_object_handle_unreference(obj);
+	drm_gem_object_handle_unreference_unlocked(obj);
 
 	return 0;
 }
@@ -403,16 +397,25 @@
 void
 drm_gem_release(struct drm_device *dev, struct drm_file *file_private)
 {
-	mutex_lock(&dev->struct_mutex);
 	idr_for_each(&file_private->object_idr,
 		     &drm_gem_object_release_handle, NULL);
 
 	idr_destroy(&file_private->object_idr);
-	mutex_unlock(&dev->struct_mutex);
+}
+
+static void
+drm_gem_object_free_common(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	fput(obj->filp);
+	atomic_dec(&dev->object_count);
+	atomic_sub(obj->size, &dev->object_memory);
+	kfree(obj);
 }
 
 /**
  * Called after the last reference to the object has been lost.
+ * Must be called holding struct_ mutex
  *
  * Frees the object
  */
@@ -427,14 +430,40 @@
 	if (dev->driver->gem_free_object != NULL)
 		dev->driver->gem_free_object(obj);
 
-	fput(obj->filp);
-	atomic_dec(&dev->object_count);
-	atomic_sub(obj->size, &dev->object_memory);
-	kfree(obj);
+	drm_gem_object_free_common(obj);
 }
 EXPORT_SYMBOL(drm_gem_object_free);
 
 /**
+ * Called after the last reference to the object has been lost.
+ * Must be called without holding struct_mutex
+ *
+ * Frees the object
+ */
+void
+drm_gem_object_free_unlocked(struct kref *kref)
+{
+	struct drm_gem_object *obj = (struct drm_gem_object *) kref;
+	struct drm_device *dev = obj->dev;
+
+	if (dev->driver->gem_free_object_unlocked != NULL)
+		dev->driver->gem_free_object_unlocked(obj);
+	else if (dev->driver->gem_free_object != NULL) {
+		mutex_lock(&dev->struct_mutex);
+		dev->driver->gem_free_object(obj);
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	drm_gem_object_free_common(obj);
+}
+EXPORT_SYMBOL(drm_gem_object_free_unlocked);
+
+static void drm_gem_object_ref_bug(struct kref *list_kref)
+{
+	BUG();
+}
+
+/**
  * Called after the last handle to the object has been closed
  *
  * Removes any name for the object. Note that this must be
@@ -458,8 +487,10 @@
 		/*
 		 * The object name held a reference to this object, drop
 		 * that now.
+		*
+		* This cannot be the last reference, since the handle holds one too.
 		 */
-		drm_gem_object_unreference(obj);
+		kref_put(&obj->refcount, drm_gem_object_ref_bug);
 	} else
 		spin_unlock(&dev->object_name_lock);
 
@@ -477,11 +508,8 @@
 void drm_gem_vm_close(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
-	struct drm_device *dev = obj->dev;
 
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 }
 EXPORT_SYMBOL(drm_gem_vm_close);
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a894ade..1376dfe 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -162,7 +162,7 @@
 	struct drm_device *dev = node->minor->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
-	if (!IS_IRONLAKE(dev)) {
+	if (!HAS_PCH_SPLIT(dev)) {
 		seq_printf(m, "Interrupt enable:    %08x\n",
 			   I915_READ(IER));
 		seq_printf(m, "Interrupt identity:  %08x\n",
@@ -350,6 +350,36 @@
 	return 0;
 }
 
+static const char *pin_flag(int pinned)
+{
+	if (pinned > 0)
+		return " P";
+	else if (pinned < 0)
+		return " p";
+	else
+		return "";
+}
+
+static const char *tiling_flag(int tiling)
+{
+	switch (tiling) {
+	default:
+	case I915_TILING_NONE: return "";
+	case I915_TILING_X: return " X";
+	case I915_TILING_Y: return " Y";
+	}
+}
+
+static const char *dirty_flag(int dirty)
+{
+	return dirty ? " dirty" : "";
+}
+
+static const char *purgeable_flag(int purgeable)
+{
+	return purgeable ? " purgeable" : "";
+}
+
 static int i915_error_state(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -357,6 +387,7 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_error_state *error;
 	unsigned long flags;
+	int i, page, offset, elt;
 
 	spin_lock_irqsave(&dev_priv->error_lock, flags);
 	if (!dev_priv->first_error) {
@@ -368,6 +399,7 @@
 
 	seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
 		   error->time.tv_usec);
+	seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
 	seq_printf(m, "EIR: 0x%08x\n", error->eir);
 	seq_printf(m, "  PGTBL_ER: 0x%08x\n", error->pgtbl_er);
 	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
@@ -379,6 +411,59 @@
 		seq_printf(m, "  INSTPS: 0x%08x\n", error->instps);
 		seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
 	}
+	seq_printf(m, "seqno: 0x%08x\n", error->seqno);
+
+	if (error->active_bo_count) {
+		seq_printf(m, "Buffers [%d]:\n", error->active_bo_count);
+
+		for (i = 0; i < error->active_bo_count; i++) {
+			seq_printf(m, "  %08x %8zd %08x %08x %08x%s%s%s%s",
+				   error->active_bo[i].gtt_offset,
+				   error->active_bo[i].size,
+				   error->active_bo[i].read_domains,
+				   error->active_bo[i].write_domain,
+				   error->active_bo[i].seqno,
+				   pin_flag(error->active_bo[i].pinned),
+				   tiling_flag(error->active_bo[i].tiling),
+				   dirty_flag(error->active_bo[i].dirty),
+				   purgeable_flag(error->active_bo[i].purgeable));
+
+			if (error->active_bo[i].name)
+				seq_printf(m, " (name: %d)", error->active_bo[i].name);
+			if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE)
+				seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg);
+
+			seq_printf(m, "\n");
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(error->batchbuffer); i++) {
+		if (error->batchbuffer[i]) {
+			struct drm_i915_error_object *obj = error->batchbuffer[i];
+
+			seq_printf(m, "--- gtt_offset = 0x%08x\n", obj->gtt_offset);
+			offset = 0;
+			for (page = 0; page < obj->page_count; page++) {
+				for (elt = 0; elt < PAGE_SIZE/4; elt++) {
+					seq_printf(m, "%08x :  %08x\n", offset, obj->pages[page][elt]);
+					offset += 4;
+				}
+			}
+		}
+	}
+
+	if (error->ringbuffer) {
+		struct drm_i915_error_object *obj = error->ringbuffer;
+
+		seq_printf(m, "--- ringbuffer = 0x%08x\n", obj->gtt_offset);
+		offset = 0;
+		for (page = 0; page < obj->page_count; page++) {
+			for (elt = 0; elt < PAGE_SIZE/4; elt++) {
+				seq_printf(m, "%08x :  %08x\n", offset, obj->pages[page][elt]);
+				offset += 4;
+			}
+		}
+	}
 
 out:
 	spin_unlock_irqrestore(&dev_priv->error_lock, flags);
@@ -386,6 +471,165 @@
 	return 0;
 }
 
+static int i915_rstdby_delays(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	u16 crstanddelay = I915_READ16(CRSTANDVID);
+
+	seq_printf(m, "w/ctx: %d, w/o ctx: %d\n", (crstanddelay >> 8) & 0x3f, (crstanddelay & 0x3f));
+
+	return 0;
+}
+
+static int i915_cur_delayinfo(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	u16 rgvswctl = I915_READ16(MEMSWCTL);
+
+	seq_printf(m, "Last command: 0x%01x\n", (rgvswctl >> 13) & 0x3);
+	seq_printf(m, "Command status: %d\n", (rgvswctl >> 12) & 1);
+	seq_printf(m, "P%d DELAY 0x%02x\n", (rgvswctl >> 8) & 0xf,
+		   rgvswctl & 0x3f);
+
+	return 0;
+}
+
+static int i915_delayfreq_table(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	u32 delayfreq;
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		delayfreq = I915_READ(PXVFREQ_BASE + i * 4);
+		seq_printf(m, "P%02dVIDFREQ: 0x%08x\n", i, delayfreq);
+	}
+
+	return 0;
+}
+
+static inline int MAP_TO_MV(int map)
+{
+	return 1250 - (map * 25);
+}
+
+static int i915_inttoext_table(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	u32 inttoext;
+	int i;
+
+	for (i = 1; i <= 32; i++) {
+		inttoext = I915_READ(INTTOEXT_BASE_ILK + i * 4);
+		seq_printf(m, "INTTOEXT%02d: 0x%08x\n", i, inttoext);
+	}
+
+	return 0;
+}
+
+static int i915_drpc_info(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	u32 rgvmodectl = I915_READ(MEMMODECTL);
+
+	seq_printf(m, "HD boost: %s\n", (rgvmodectl & MEMMODE_BOOST_EN) ?
+		   "yes" : "no");
+	seq_printf(m, "Boost freq: %d\n",
+		   (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
+		   MEMMODE_BOOST_FREQ_SHIFT);
+	seq_printf(m, "HW control enabled: %s\n",
+		   rgvmodectl & MEMMODE_HWIDLE_EN ? "yes" : "no");
+	seq_printf(m, "SW control enabled: %s\n",
+		   rgvmodectl & MEMMODE_SWMODE_EN ? "yes" : "no");
+	seq_printf(m, "Gated voltage change: %s\n",
+		   rgvmodectl & MEMMODE_RCLK_GATE ? "yes" : "no");
+	seq_printf(m, "Starting frequency: P%d\n",
+		   (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
+	seq_printf(m, "Max frequency: P%d\n",
+		   (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
+	seq_printf(m, "Min frequency: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
+
+	return 0;
+}
+
+static int i915_fbc_status(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_crtc *crtc;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	bool fbc_enabled = false;
+
+	if (!dev_priv->display.fbc_enabled) {
+		seq_printf(m, "FBC unsupported on this chipset\n");
+		return 0;
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (!crtc->enabled)
+			continue;
+		if (dev_priv->display.fbc_enabled(crtc))
+			fbc_enabled = true;
+	}
+
+	if (fbc_enabled) {
+		seq_printf(m, "FBC enabled\n");
+	} else {
+		seq_printf(m, "FBC disabled: ");
+		switch (dev_priv->no_fbc_reason) {
+		case FBC_STOLEN_TOO_SMALL:
+			seq_printf(m, "not enough stolen memory");
+			break;
+		case FBC_UNSUPPORTED_MODE:
+			seq_printf(m, "mode not supported");
+			break;
+		case FBC_MODE_TOO_LARGE:
+			seq_printf(m, "mode too large");
+			break;
+		case FBC_BAD_PLANE:
+			seq_printf(m, "FBC unsupported on plane");
+			break;
+		case FBC_NOT_TILED:
+			seq_printf(m, "scanout buffer not tiled");
+			break;
+		default:
+			seq_printf(m, "unknown reason");
+		}
+		seq_printf(m, "\n");
+	}
+	return 0;
+}
+
+static int i915_sr_status(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	bool sr_enabled = false;
+
+	if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev))
+		sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
+	else if (IS_I915GM(dev))
+		sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
+	else if (IS_PINEVIEW(dev))
+		sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN;
+
+	seq_printf(m, "self-refresh: %s\n", sr_enabled ? "enabled" :
+		   "disabled");
+
+	return 0;
+}
+
 static int
 i915_wedged_open(struct inode *inode,
 		 struct file *filp)
@@ -503,6 +747,13 @@
 	{"i915_ringbuffer_info", i915_ringbuffer_info, 0},
 	{"i915_batchbuffers", i915_batchbuffer_info, 0},
 	{"i915_error_state", i915_error_state, 0},
+	{"i915_rstdby_delays", i915_rstdby_delays, 0},
+	{"i915_cur_delayinfo", i915_cur_delayinfo, 0},
+	{"i915_delayfreq_table", i915_delayfreq_table, 0},
+	{"i915_inttoext_table", i915_inttoext_table, 0},
+	{"i915_drpc_info", i915_drpc_info, 0},
+	{"i915_fbc_status", i915_fbc_status, 0},
+	{"i915_sr_status", i915_sr_status, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2307f98..8bfc0bb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -35,6 +35,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include <linux/vgaarb.h>
+#include <linux/acpi.h>
+#include <linux/pnp.h>
+#include <linux/vga_switcheroo.h>
 
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
@@ -933,6 +936,120 @@
 	return 0;
 }
 
+#define MCHBAR_I915 0x44
+#define MCHBAR_I965 0x48
+#define MCHBAR_SIZE (4*4096)
+
+#define DEVEN_REG 0x54
+#define   DEVEN_MCHBAR_EN (1 << 28)
+
+/* Allocate space for the MCH regs if needed, return nonzero on error */
+static int
+intel_alloc_mchbar_resource(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
+	u32 temp_lo, temp_hi = 0;
+	u64 mchbar_addr;
+	int ret = 0;
+
+	if (IS_I965G(dev))
+		pci_read_config_dword(dev_priv->bridge_dev, reg + 4, &temp_hi);
+	pci_read_config_dword(dev_priv->bridge_dev, reg, &temp_lo);
+	mchbar_addr = ((u64)temp_hi << 32) | temp_lo;
+
+	/* If ACPI doesn't have it, assume we need to allocate it ourselves */
+#ifdef CONFIG_PNP
+	if (mchbar_addr &&
+	    pnp_range_reserved(mchbar_addr, mchbar_addr + MCHBAR_SIZE)) {
+		ret = 0;
+		goto out;
+	}
+#endif
+
+	/* Get some space for it */
+	ret = pci_bus_alloc_resource(dev_priv->bridge_dev->bus, &dev_priv->mch_res,
+				     MCHBAR_SIZE, MCHBAR_SIZE,
+				     PCIBIOS_MIN_MEM,
+				     0,   pcibios_align_resource,
+				     dev_priv->bridge_dev);
+	if (ret) {
+		DRM_DEBUG_DRIVER("failed bus alloc: %d\n", ret);
+		dev_priv->mch_res.start = 0;
+		goto out;
+	}
+
+	if (IS_I965G(dev))
+		pci_write_config_dword(dev_priv->bridge_dev, reg + 4,
+				       upper_32_bits(dev_priv->mch_res.start));
+
+	pci_write_config_dword(dev_priv->bridge_dev, reg,
+			       lower_32_bits(dev_priv->mch_res.start));
+out:
+	return ret;
+}
+
+/* Setup MCHBAR if possible, return true if we should disable it again */
+static void
+intel_setup_mchbar(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int mchbar_reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
+	u32 temp;
+	bool enabled;
+
+	dev_priv->mchbar_need_disable = false;
+
+	if (IS_I915G(dev) || IS_I915GM(dev)) {
+		pci_read_config_dword(dev_priv->bridge_dev, DEVEN_REG, &temp);
+		enabled = !!(temp & DEVEN_MCHBAR_EN);
+	} else {
+		pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
+		enabled = temp & 1;
+	}
+
+	/* If it's already enabled, don't have to do anything */
+	if (enabled)
+		return;
+
+	if (intel_alloc_mchbar_resource(dev))
+		return;
+
+	dev_priv->mchbar_need_disable = true;
+
+	/* Space is allocated or reserved, so enable it. */
+	if (IS_I915G(dev) || IS_I915GM(dev)) {
+		pci_write_config_dword(dev_priv->bridge_dev, DEVEN_REG,
+				       temp | DEVEN_MCHBAR_EN);
+	} else {
+		pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
+		pci_write_config_dword(dev_priv->bridge_dev, mchbar_reg, temp | 1);
+	}
+}
+
+static void
+intel_teardown_mchbar(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int mchbar_reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
+	u32 temp;
+
+	if (dev_priv->mchbar_need_disable) {
+		if (IS_I915G(dev) || IS_I915GM(dev)) {
+			pci_read_config_dword(dev_priv->bridge_dev, DEVEN_REG, &temp);
+			temp &= ~DEVEN_MCHBAR_EN;
+			pci_write_config_dword(dev_priv->bridge_dev, DEVEN_REG, temp);
+		} else {
+			pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
+			temp &= ~1;
+			pci_write_config_dword(dev_priv->bridge_dev, mchbar_reg, temp);
+		}
+	}
+
+	if (dev_priv->mch_res.start)
+		release_resource(&dev_priv->mch_res);
+}
+
 /**
  * i915_probe_agp - get AGP bootup configuration
  * @pdev: PCI device
@@ -978,59 +1095,123 @@
 	 * Some of the preallocated space is taken by the GTT
 	 * and popup.  GTT is 1K per MB of aperture size, and popup is 4K.
 	 */
-	if (IS_G4X(dev) || IS_PINEVIEW(dev) || IS_IRONLAKE(dev))
+	if (IS_G4X(dev) || IS_PINEVIEW(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev))
 		overhead = 4096;
 	else
 		overhead = (*aperture_size / 1024) + 4096;
 
-	switch (tmp & INTEL_GMCH_GMS_MASK) {
-	case INTEL_855_GMCH_GMS_DISABLED:
-		DRM_ERROR("video memory is disabled\n");
-		return -1;
-	case INTEL_855_GMCH_GMS_STOLEN_1M:
-		stolen = 1 * 1024 * 1024;
-		break;
-	case INTEL_855_GMCH_GMS_STOLEN_4M:
-		stolen = 4 * 1024 * 1024;
-		break;
-	case INTEL_855_GMCH_GMS_STOLEN_8M:
-		stolen = 8 * 1024 * 1024;
-		break;
-	case INTEL_855_GMCH_GMS_STOLEN_16M:
-		stolen = 16 * 1024 * 1024;
-		break;
-	case INTEL_855_GMCH_GMS_STOLEN_32M:
-		stolen = 32 * 1024 * 1024;
-		break;
-	case INTEL_915G_GMCH_GMS_STOLEN_48M:
-		stolen = 48 * 1024 * 1024;
-		break;
-	case INTEL_915G_GMCH_GMS_STOLEN_64M:
-		stolen = 64 * 1024 * 1024;
-		break;
-	case INTEL_GMCH_GMS_STOLEN_128M:
-		stolen = 128 * 1024 * 1024;
-		break;
-	case INTEL_GMCH_GMS_STOLEN_256M:
-		stolen = 256 * 1024 * 1024;
-		break;
-	case INTEL_GMCH_GMS_STOLEN_96M:
-		stolen = 96 * 1024 * 1024;
-		break;
-	case INTEL_GMCH_GMS_STOLEN_160M:
-		stolen = 160 * 1024 * 1024;
-		break;
-	case INTEL_GMCH_GMS_STOLEN_224M:
-		stolen = 224 * 1024 * 1024;
-		break;
-	case INTEL_GMCH_GMS_STOLEN_352M:
-		stolen = 352 * 1024 * 1024;
-		break;
-	default:
-		DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n",
-			tmp & INTEL_GMCH_GMS_MASK);
-		return -1;
+	if (IS_GEN6(dev)) {
+		/* SNB has memory control reg at 0x50.w */
+		pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &tmp);
+
+		switch (tmp & SNB_GMCH_GMS_STOLEN_MASK) {
+		case INTEL_855_GMCH_GMS_DISABLED:
+			DRM_ERROR("video memory is disabled\n");
+			return -1;
+		case SNB_GMCH_GMS_STOLEN_32M:
+			stolen = 32 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_64M:
+			stolen = 64 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_96M:
+			stolen = 96 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_128M:
+			stolen = 128 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_160M:
+			stolen = 160 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_192M:
+			stolen = 192 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_224M:
+			stolen = 224 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_256M:
+			stolen = 256 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_288M:
+			stolen = 288 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_320M:
+			stolen = 320 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_352M:
+			stolen = 352 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_384M:
+			stolen = 384 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_416M:
+			stolen = 416 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_448M:
+			stolen = 448 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_480M:
+			stolen = 480 * 1024 * 1024;
+			break;
+		case SNB_GMCH_GMS_STOLEN_512M:
+			stolen = 512 * 1024 * 1024;
+			break;
+		default:
+			DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n",
+				  tmp & SNB_GMCH_GMS_STOLEN_MASK);
+			return -1;
+		}
+	} else {
+		switch (tmp & INTEL_GMCH_GMS_MASK) {
+		case INTEL_855_GMCH_GMS_DISABLED:
+			DRM_ERROR("video memory is disabled\n");
+			return -1;
+		case INTEL_855_GMCH_GMS_STOLEN_1M:
+			stolen = 1 * 1024 * 1024;
+			break;
+		case INTEL_855_GMCH_GMS_STOLEN_4M:
+			stolen = 4 * 1024 * 1024;
+			break;
+		case INTEL_855_GMCH_GMS_STOLEN_8M:
+			stolen = 8 * 1024 * 1024;
+			break;
+		case INTEL_855_GMCH_GMS_STOLEN_16M:
+			stolen = 16 * 1024 * 1024;
+			break;
+		case INTEL_855_GMCH_GMS_STOLEN_32M:
+			stolen = 32 * 1024 * 1024;
+			break;
+		case INTEL_915G_GMCH_GMS_STOLEN_48M:
+			stolen = 48 * 1024 * 1024;
+			break;
+		case INTEL_915G_GMCH_GMS_STOLEN_64M:
+			stolen = 64 * 1024 * 1024;
+			break;
+		case INTEL_GMCH_GMS_STOLEN_128M:
+			stolen = 128 * 1024 * 1024;
+			break;
+		case INTEL_GMCH_GMS_STOLEN_256M:
+			stolen = 256 * 1024 * 1024;
+			break;
+		case INTEL_GMCH_GMS_STOLEN_96M:
+			stolen = 96 * 1024 * 1024;
+			break;
+		case INTEL_GMCH_GMS_STOLEN_160M:
+			stolen = 160 * 1024 * 1024;
+			break;
+		case INTEL_GMCH_GMS_STOLEN_224M:
+			stolen = 224 * 1024 * 1024;
+			break;
+		case INTEL_GMCH_GMS_STOLEN_352M:
+			stolen = 352 * 1024 * 1024;
+			break;
+		default:
+			DRM_ERROR("unexpected GMCH_GMS value: 0x%02x\n",
+				  tmp & INTEL_GMCH_GMS_MASK);
+			return -1;
+		}
 	}
+
 	*preallocated_size = stolen - overhead;
 	*start = overhead;
 
@@ -1064,7 +1245,7 @@
 	int gtt_offset, gtt_size;
 
 	if (IS_I965G(dev)) {
-		if (IS_G4X(dev) || IS_IRONLAKE(dev)) {
+		if (IS_G4X(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev)) {
 			gtt_offset = 2*1024*1024;
 			gtt_size = 2*1024*1024;
 		} else {
@@ -1133,6 +1314,7 @@
 	/* Leave 1M for line length buffer & misc. */
 	compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0);
 	if (!compressed_fb) {
+		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
 		i915_warn_stolen(dev);
 		return;
 	}
@@ -1140,6 +1322,7 @@
 	compressed_fb = drm_mm_get_block(compressed_fb, size, 4096);
 	if (!compressed_fb) {
 		i915_warn_stolen(dev);
+		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
 		return;
 	}
 
@@ -1199,6 +1382,32 @@
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
+static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_INFO "i915: switched off\n");
+		/* i915 resume handler doesn't set to D0 */
+		pci_set_power_state(dev->pdev, PCI_D0);
+		i915_resume(dev);
+	} else {
+		printk(KERN_ERR "i915: switched off\n");
+		i915_suspend(dev, pmm);
+	}
+}
+
+static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
 static int i915_load_modeset_init(struct drm_device *dev,
 				  unsigned long prealloc_start,
 				  unsigned long prealloc_size,
@@ -1260,6 +1469,12 @@
 	if (ret)
 		goto destroy_ringbuffer;
 
+	ret = vga_switcheroo_register_client(dev->pdev,
+					     i915_switcheroo_set_state,
+					     i915_switcheroo_can_switch);
+	if (ret)
+		goto destroy_ringbuffer;
+
 	intel_modeset_init(dev);
 
 	ret = drm_irq_install(dev);
@@ -1281,7 +1496,9 @@
 	return 0;
 
 destroy_ringbuffer:
+	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_ringbuffer(dev);
+	mutex_unlock(&dev->struct_mutex);
 out:
 	return ret;
 }
@@ -1445,11 +1662,14 @@
 
 	dev->driver->get_vblank_counter = i915_get_vblank_counter;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
-	if (IS_G4X(dev) || IS_IRONLAKE(dev)) {
+	if (IS_G4X(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev)) {
 		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
 		dev->driver->get_vblank_counter = gm45_get_vblank_counter;
 	}
 
+	/* Try to make sure MCHBAR is enabled before poking at it */
+	intel_setup_mchbar(dev);
+
 	i915_gem_load(dev);
 
 	/* Init HWS */
@@ -1523,6 +1743,8 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	i915_destroy_error_state(dev);
+
 	destroy_workqueue(dev_priv->wq);
 	del_timer_sync(&dev_priv->hangcheck_timer);
 
@@ -1544,6 +1766,7 @@
 			dev_priv->child_dev_num = 0;
 		}
 		drm_irq_uninstall(dev);
+		vga_switcheroo_unregister_client(dev->pdev);
 		vga_client_register(dev->pdev, NULL, NULL, NULL);
 	}
 
@@ -1569,6 +1792,8 @@
 		intel_cleanup_overlay(dev);
 	}
 
+	intel_teardown_mchbar(dev);
+
 	pci_dev_put(dev_priv->bridge_dev);
 	kfree(dev->dev_private);
 
@@ -1611,6 +1836,7 @@
 
 	if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) {
 		drm_fb_helper_restore();
+		vga_switcheroo_process_delayed_switch();
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index cf4cb3e..1b2e954 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,6 +49,7 @@
 module_param_named(lvds_downclock, i915_lvds_downclock, int, 0400);
 
 static struct drm_driver driver;
+extern int intel_agp_enabled;
 
 #define INTEL_VGA_DEVICE(id, info) {		\
 	.class = PCI_CLASS_DISPLAY_VGA << 8,	\
@@ -136,6 +137,16 @@
 	.has_hotplug = 1,
 };
 
+const static struct intel_device_info intel_sandybridge_d_info = {
+	.is_i965g = 1, .is_i9xx = 1, .need_gfx_hws = 1,
+	.has_hotplug = 1,
+};
+
+const static struct intel_device_info intel_sandybridge_m_info = {
+	.is_i965g = 1, .is_mobile = 1, .is_i9xx = 1, .need_gfx_hws = 1,
+	.has_hotplug = 1,
+};
+
 const static struct pci_device_id pciidlist[] = {
 	INTEL_VGA_DEVICE(0x3577, &intel_i830_info),
 	INTEL_VGA_DEVICE(0x2562, &intel_845g_info),
@@ -167,6 +178,8 @@
 	INTEL_VGA_DEVICE(0xa011, &intel_pineview_info),
 	INTEL_VGA_DEVICE(0x0042, &intel_ironlake_d_info),
 	INTEL_VGA_DEVICE(0x0046, &intel_ironlake_m_info),
+	INTEL_VGA_DEVICE(0x0102, &intel_sandybridge_d_info),
+	INTEL_VGA_DEVICE(0x0106, &intel_sandybridge_m_info),
 	{0, 0, 0}
 };
 
@@ -201,7 +214,7 @@
 	return 0;
 }
 
-static int i915_suspend(struct drm_device *dev, pm_message_t state)
+int i915_suspend(struct drm_device *dev, pm_message_t state)
 {
 	int error;
 
@@ -255,7 +268,7 @@
 	return error;
 }
 
-static int i915_resume(struct drm_device *dev)
+int i915_resume(struct drm_device *dev)
 {
 	if (pci_enable_device(dev->pdev))
 		return -EIO;
@@ -546,6 +559,11 @@
 
 static int __init i915_init(void)
 {
+	if (!intel_agp_enabled) {
+		DRM_ERROR("drm/i915 can't work without intel_agp module!\n");
+		return -ENODEV;
+	}
+
 	driver.num_ioctls = i915_max_ioctl;
 
 	i915_gem_shrinker_init();
@@ -571,6 +589,11 @@
 		driver.driver_features &= ~DRIVER_MODESET;
 #endif
 
+	if (!(driver.driver_features & DRIVER_MODESET)) {
+		driver.suspend = i915_suspend;
+		driver.resume = i915_resume;
+	}
+
 	return drm_init(&driver);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b99b6a8..979439c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -150,7 +150,27 @@
 	u32 instps;
 	u32 instdone1;
 	u32 seqno;
+	u64 bbaddr;
 	struct timeval time;
+	struct drm_i915_error_object {
+		int page_count;
+		u32 gtt_offset;
+		u32 *pages[0];
+	} *ringbuffer, *batchbuffer[2];
+	struct drm_i915_error_buffer {
+		size_t size;
+		u32 name;
+		u32 seqno;
+		u32 gtt_offset;
+		u32 read_domains;
+		u32 write_domain;
+		u32 fence_reg;
+		s32 pinned:2;
+		u32 tiling:2;
+		u32 dirty:1;
+		u32 purgeable:1;
+	} *active_bo;
+	u32 active_bo_count;
 };
 
 struct drm_i915_display_funcs {
@@ -192,6 +212,14 @@
 	u8 cursor_needs_physical : 1;
 };
 
+enum no_fbc_reason {
+	FBC_STOLEN_TOO_SMALL, /* not enough space to hold compressed buffers */
+	FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
+	FBC_MODE_TOO_LARGE, /* mode too large for compression */
+	FBC_BAD_PLANE, /* fbc not supported on plane */
+	FBC_NOT_TILED, /* buffer not tiled */
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 
@@ -452,6 +480,7 @@
 	u32 savePIPEB_DATA_N1;
 	u32 savePIPEB_LINK_M1;
 	u32 savePIPEB_LINK_N1;
+	u32 saveMCHBAR_RENDER_STANDBY;
 
 	struct {
 		struct drm_mm gtt_space;
@@ -590,6 +619,14 @@
 	int child_dev_num;
 	struct child_device_config *child_dev;
 	struct drm_connector *int_lvds_connector;
+
+	bool mchbar_need_disable;
+
+	u8 cur_delay;
+	u8 min_delay;
+	u8 max_delay;
+
+	enum no_fbc_reason no_fbc_reason;
 } drm_i915_private_t;
 
 /** driver private structure attached to each drm_gem_object */
@@ -736,6 +773,8 @@
 extern unsigned int i915_powersave;
 extern unsigned int i915_lvds_downclock;
 
+extern int i915_suspend(struct drm_device *dev, pm_message_t state);
+extern int i915_resume(struct drm_device *dev);
 extern void i915_save_display(struct drm_device *dev);
 extern void i915_restore_display(struct drm_device *dev);
 extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
@@ -761,6 +800,7 @@
 
 /* i915_irq.c */
 void i915_hangcheck_elapsed(unsigned long data);
+void i915_destroy_error_state(struct drm_device *dev);
 extern int i915_irq_emit(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 extern int i915_irq_wait(struct drm_device *dev, void *data,
@@ -897,7 +937,8 @@
 void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
 bool i915_tiling_ok(struct drm_device *dev, int stride, int size,
 		    int tiling_mode);
-bool i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj);
+bool i915_gem_object_fence_offset_ok(struct drm_gem_object *obj,
+				     int tiling_mode);
 
 /* i915_gem_debug.c */
 void i915_gem_dump_object(struct drm_gem_object *obj, int len,
@@ -1026,7 +1067,7 @@
 #define IS_845G(dev)		((dev)->pci_device == 0x2562)
 #define IS_I85X(dev)		((dev)->pci_device == 0x3582)
 #define IS_I865G(dev)		((dev)->pci_device == 0x2572)
-#define IS_I8XX(dev)		(INTEL_INFO(dev)->is_i8xx)
+#define IS_GEN2(dev)		(INTEL_INFO(dev)->is_i8xx)
 #define IS_I915G(dev)		(INTEL_INFO(dev)->is_i915g)
 #define IS_I915GM(dev)		((dev)->pci_device == 0x2592)
 #define IS_I945G(dev)		((dev)->pci_device == 0x2772)
@@ -1045,8 +1086,29 @@
 #define IS_I9XX(dev)		(INTEL_INFO(dev)->is_i9xx)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 
+#define IS_GEN3(dev)	(IS_I915G(dev) ||			\
+			 IS_I915GM(dev) ||			\
+			 IS_I945G(dev) ||			\
+			 IS_I945GM(dev) ||			\
+			 IS_G33(dev) || \
+			 IS_PINEVIEW(dev))
+#define IS_GEN4(dev)	((dev)->pci_device == 0x2972 ||		\
+			 (dev)->pci_device == 0x2982 ||		\
+			 (dev)->pci_device == 0x2992 ||		\
+			 (dev)->pci_device == 0x29A2 ||		\
+			 (dev)->pci_device == 0x2A02 ||		\
+			 (dev)->pci_device == 0x2A12 ||		\
+			 (dev)->pci_device == 0x2E02 ||		\
+			 (dev)->pci_device == 0x2E12 ||		\
+			 (dev)->pci_device == 0x2E22 ||		\
+			 (dev)->pci_device == 0x2E32 ||		\
+			 (dev)->pci_device == 0x2A42 ||		\
+			 (dev)->pci_device == 0x2E42)
+
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
+#define IS_GEN6(dev)	((dev)->pci_device == 0x0102)
+
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
  */
@@ -1067,6 +1129,9 @@
 #define I915_HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
 #define I915_HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6)
 
+#define HAS_PCH_SPLIT(dev) (IS_IRONLAKE(dev) ||	\
+			    IS_GEN6(dev))
+
 #define PRIMARY_RINGBUFFER_SIZE         (128*1024)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ec8a0d7..fba37e9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -128,9 +128,7 @@
 		return -ENOMEM;
 
 	ret = drm_gem_handle_create(file_priv, obj, &handle);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_handle_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_handle_unreference_unlocked(obj);
 
 	if (ret)
 		return ret;
@@ -488,7 +486,7 @@
 	 */
 	if (args->offset > obj->size || args->size > obj->size ||
 	    args->offset + args->size > obj->size) {
-		drm_gem_object_unreference(obj);
+		drm_gem_object_unreference_unlocked(obj);
 		return -EINVAL;
 	}
 
@@ -501,7 +499,7 @@
 							file_priv);
 	}
 
-	drm_gem_object_unreference(obj);
+	drm_gem_object_unreference_unlocked(obj);
 
 	return ret;
 }
@@ -961,7 +959,7 @@
 	 */
 	if (args->offset > obj->size || args->size > obj->size ||
 	    args->offset + args->size > obj->size) {
-		drm_gem_object_unreference(obj);
+		drm_gem_object_unreference_unlocked(obj);
 		return -EINVAL;
 	}
 
@@ -995,7 +993,7 @@
 		DRM_INFO("pwrite failed %d\n", ret);
 #endif
 
-	drm_gem_object_unreference(obj);
+	drm_gem_object_unreference_unlocked(obj);
 
 	return ret;
 }
@@ -1138,9 +1136,7 @@
 		       PROT_READ | PROT_WRITE, MAP_SHARED,
 		       args->offset);
 	up_write(&current->mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 	if (IS_ERR((void *)addr))
 		return addr;
 
@@ -1562,6 +1558,38 @@
 	i915_verify_inactive(dev, __FILE__, __LINE__);
 }
 
+static void
+i915_gem_process_flushing_list(struct drm_device *dev,
+			       uint32_t flush_domains, uint32_t seqno)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv, *next;
+
+	list_for_each_entry_safe(obj_priv, next,
+				 &dev_priv->mm.gpu_write_list,
+				 gpu_write_list) {
+		struct drm_gem_object *obj = obj_priv->obj;
+
+		if ((obj->write_domain & flush_domains) ==
+		    obj->write_domain) {
+			uint32_t old_write_domain = obj->write_domain;
+
+			obj->write_domain = 0;
+			list_del_init(&obj_priv->gpu_write_list);
+			i915_gem_object_move_to_active(obj, seqno);
+
+			/* update the fence lru list */
+			if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
+				list_move_tail(&obj_priv->fence_list,
+						&dev_priv->mm.fence_list);
+
+			trace_i915_gem_object_change_domain(obj,
+							    obj->read_domains,
+							    old_write_domain);
+		}
+	}
+}
+
 /**
  * Creates a new sequence number, emitting a write of it to the status page
  * plus an interrupt, which will trigger i915_user_interrupt_handler.
@@ -1620,29 +1648,8 @@
 	/* Associate any objects on the flushing list matching the write
 	 * domain we're flushing with our flush.
 	 */
-	if (flush_domains != 0) {
-		struct drm_i915_gem_object *obj_priv, *next;
-
-		list_for_each_entry_safe(obj_priv, next,
-					 &dev_priv->mm.gpu_write_list,
-					 gpu_write_list) {
-			struct drm_gem_object *obj = obj_priv->obj;
-
-			if ((obj->write_domain & flush_domains) ==
-			    obj->write_domain) {
-				uint32_t old_write_domain = obj->write_domain;
-
-				obj->write_domain = 0;
-				list_del_init(&obj_priv->gpu_write_list);
-				i915_gem_object_move_to_active(obj, seqno);
-
-				trace_i915_gem_object_change_domain(obj,
-								    obj->read_domains,
-								    old_write_domain);
-			}
-		}
-
-	}
+	if (flush_domains != 0) 
+		i915_gem_process_flushing_list(dev, flush_domains, seqno);
 
 	if (!dev_priv->mm.suspended) {
 		mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
@@ -1822,7 +1829,7 @@
 		return -EIO;
 
 	if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			ier = I915_READ(DEIER) | I915_READ(GTIER);
 		else
 			ier = I915_READ(IER);
@@ -1991,6 +1998,7 @@
 i915_gem_object_unbind(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int ret = 0;
 
@@ -2046,8 +2054,10 @@
 	}
 
 	/* Remove ourselves from the LRU list if present. */
+	spin_lock(&dev_priv->mm.active_list_lock);
 	if (!list_empty(&obj_priv->list))
 		list_del_init(&obj_priv->list);
+	spin_unlock(&dev_priv->mm.active_list_lock);
 
 	if (i915_gem_object_is_purgeable(obj_priv))
 		i915_gem_object_truncate(obj);
@@ -2085,11 +2095,34 @@
 }
 
 static int
+i915_gpu_idle(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	bool lists_empty;
+	uint32_t seqno;
+
+	spin_lock(&dev_priv->mm.active_list_lock);
+	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
+		      list_empty(&dev_priv->mm.active_list);
+	spin_unlock(&dev_priv->mm.active_list_lock);
+
+	if (lists_empty)
+		return 0;
+
+	/* Flush everything onto the inactive list. */
+	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
+	if (seqno == 0)
+		return -ENOMEM;
+
+	return i915_wait_request(dev, seqno);
+}
+
+static int
 i915_gem_evict_everything(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
-	uint32_t seqno;
 	bool lists_empty;
 
 	spin_lock(&dev_priv->mm.active_list_lock);
@@ -2102,12 +2135,7 @@
 		return -ENOSPC;
 
 	/* Flush everything (on to the inactive lists) and evict */
-	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
-	seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
-	if (seqno == 0)
-		return -ENOMEM;
-
-	ret = i915_wait_request(dev, seqno);
+	ret = i915_gpu_idle(dev);
 	if (ret)
 		return ret;
 
@@ -2265,6 +2293,28 @@
 	return 0;
 }
 
+static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
+{
+	struct drm_gem_object *obj = reg->obj;
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	int regnum = obj_priv->fence_reg;
+	uint64_t val;
+
+	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
+		    0xfffff000) << 32;
+	val |= obj_priv->gtt_offset & 0xfffff000;
+	val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
+		SANDYBRIDGE_FENCE_PITCH_SHIFT;
+
+	if (obj_priv->tiling_mode == I915_TILING_Y)
+		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
+	val |= I965_FENCE_REG_VALID;
+
+	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
+}
+
 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
 {
 	struct drm_gem_object *obj = reg->obj;
@@ -2361,6 +2411,58 @@
 	I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
 }
 
+static int i915_find_fence_reg(struct drm_device *dev)
+{
+	struct drm_i915_fence_reg *reg = NULL;
+	struct drm_i915_gem_object *obj_priv = NULL;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_gem_object *obj = NULL;
+	int i, avail, ret;
+
+	/* First try to find a free reg */
+	avail = 0;
+	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
+		reg = &dev_priv->fence_regs[i];
+		if (!reg->obj)
+			return i;
+
+		obj_priv = reg->obj->driver_private;
+		if (!obj_priv->pin_count)
+		    avail++;
+	}
+
+	if (avail == 0)
+		return -ENOSPC;
+
+	/* None available, try to steal one or wait for a user to finish */
+	i = I915_FENCE_REG_NONE;
+	list_for_each_entry(obj_priv, &dev_priv->mm.fence_list,
+			    fence_list) {
+		obj = obj_priv->obj;
+
+		if (obj_priv->pin_count)
+			continue;
+
+		/* found one! */
+		i = obj_priv->fence_reg;
+		break;
+	}
+
+	BUG_ON(i == I915_FENCE_REG_NONE);
+
+	/* We only have a reference on obj from the active list. put_fence_reg
+	 * might drop that one, causing a use-after-free in it. So hold a
+	 * private reference to obj like the other callers of put_fence_reg
+	 * (set_tiling ioctl) do. */
+	drm_gem_object_reference(obj);
+	ret = i915_gem_object_put_fence_reg(obj);
+	drm_gem_object_unreference(obj);
+	if (ret != 0)
+		return ret;
+
+	return i;
+}
+
 /**
  * i915_gem_object_get_fence_reg - set up a fence reg for an object
  * @obj: object to map through a fence reg
@@ -2381,8 +2483,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	struct drm_i915_fence_reg *reg = NULL;
-	struct drm_i915_gem_object *old_obj_priv = NULL;
-	int i, ret, avail;
+	int ret;
 
 	/* Just update our place in the LRU if our fence is getting used. */
 	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
@@ -2410,86 +2511,27 @@
 		break;
 	}
 
-	/* First try to find a free reg */
-	avail = 0;
-	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
-		reg = &dev_priv->fence_regs[i];
-		if (!reg->obj)
-			break;
+	ret = i915_find_fence_reg(dev);
+	if (ret < 0)
+		return ret;
 
-		old_obj_priv = reg->obj->driver_private;
-		if (!old_obj_priv->pin_count)
-		    avail++;
-	}
-
-	/* None available, try to steal one or wait for a user to finish */
-	if (i == dev_priv->num_fence_regs) {
-		struct drm_gem_object *old_obj = NULL;
-
-		if (avail == 0)
-			return -ENOSPC;
-
-		list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
-				    fence_list) {
-			old_obj = old_obj_priv->obj;
-
-			if (old_obj_priv->pin_count)
-				continue;
-
-			/* Take a reference, as otherwise the wait_rendering
-			 * below may cause the object to get freed out from
-			 * under us.
-			 */
-			drm_gem_object_reference(old_obj);
-
-			/* i915 uses fences for GPU access to tiled buffers */
-			if (IS_I965G(dev) || !old_obj_priv->active)
-				break;
-
-			/* This brings the object to the head of the LRU if it
-			 * had been written to.  The only way this should
-			 * result in us waiting longer than the expected
-			 * optimal amount of time is if there was a
-			 * fence-using buffer later that was read-only.
-			 */
-			i915_gem_object_flush_gpu_write_domain(old_obj);
-			ret = i915_gem_object_wait_rendering(old_obj);
-			if (ret != 0) {
-				drm_gem_object_unreference(old_obj);
-				return ret;
-			}
-
-			break;
-		}
-
-		/*
-		 * Zap this virtual mapping so we can set up a fence again
-		 * for this object next time we need it.
-		 */
-		i915_gem_release_mmap(old_obj);
-
-		i = old_obj_priv->fence_reg;
-		reg = &dev_priv->fence_regs[i];
-
-		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
-		list_del_init(&old_obj_priv->fence_list);
-
-		drm_gem_object_unreference(old_obj);
-	}
-
-	obj_priv->fence_reg = i;
+	obj_priv->fence_reg = ret;
+	reg = &dev_priv->fence_regs[obj_priv->fence_reg];
 	list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
 
 	reg->obj = obj;
 
-	if (IS_I965G(dev))
+	if (IS_GEN6(dev))
+		sandybridge_write_fence_reg(reg);
+	else if (IS_I965G(dev))
 		i965_write_fence_reg(reg);
 	else if (IS_I9XX(dev))
 		i915_write_fence_reg(reg);
 	else
 		i830_write_fence_reg(reg);
 
-	trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
+	trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
+			obj_priv->tiling_mode);
 
 	return 0;
 }
@@ -2508,9 +2550,12 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
-	if (IS_I965G(dev))
+	if (IS_GEN6(dev)) {
+		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
+			     (obj_priv->fence_reg * 8), 0);
+	} else if (IS_I965G(dev)) {
 		I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
-	else {
+	} else {
 		uint32_t fence_reg;
 
 		if (obj_priv->fence_reg < 8)
@@ -2544,6 +2589,12 @@
 	if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
 		return 0;
 
+	/* If we've changed tiling, GTT-mappings of the object
+	 * need to re-fault to ensure that the correct fence register
+	 * setup is in place.
+	 */
+	i915_gem_release_mmap(obj);
+
 	/* On the i915, GPU access to tiled buffers is via a fence,
 	 * therefore we must wait for any outstanding access to complete
 	 * before clearing the fence.
@@ -2552,12 +2603,12 @@
 		int ret;
 
 		i915_gem_object_flush_gpu_write_domain(obj);
-		i915_gem_object_flush_gtt_write_domain(obj);
 		ret = i915_gem_object_wait_rendering(obj);
 		if (ret != 0)
 			return ret;
 	}
 
+	i915_gem_object_flush_gtt_write_domain(obj);
 	i915_gem_clear_fence_reg (obj);
 
 	return 0;
@@ -2697,7 +2748,6 @@
 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
-	uint32_t seqno;
 	uint32_t old_write_domain;
 
 	if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
@@ -2706,9 +2756,8 @@
 	/* Queue the GPU write cache flushing we need. */
 	old_write_domain = obj->write_domain;
 	i915_gem_flush(dev, 0, obj->write_domain);
-	seqno = i915_add_request(dev, NULL, obj->write_domain);
+	(void) i915_add_request(dev, NULL, obj->write_domain);
 	BUG_ON(obj->write_domain);
-	i915_gem_object_move_to_active(obj, seqno);
 
 	trace_i915_gem_object_change_domain(obj,
 					    obj->read_domains,
@@ -3247,7 +3296,8 @@
 	             obj_priv->tiling_mode != I915_TILING_NONE;
 
 	/* Check fence reg constraints and rebind if necessary */
-	if (need_fence && !i915_obj_fenceable(dev, obj))
+	if (need_fence && !i915_gem_object_fence_offset_ok(obj,
+	    obj_priv->tiling_mode))
 		i915_gem_object_unbind(obj);
 
 	/* Choose the GTT offset for our buffer and put it there. */
@@ -3317,6 +3367,16 @@
 		}
 
 		/* Validate that the target is in a valid r/w GPU domain */
+		if (reloc->write_domain & (reloc->write_domain - 1)) {
+			DRM_ERROR("reloc with multiple write domains: "
+				  "obj %p target %d offset %d "
+				  "read %08x write %08x",
+				  obj, reloc->target_handle,
+				  (int) reloc->offset,
+				  reloc->read_domains,
+				  reloc->write_domain);
+			return -EINVAL;
+		}
 		if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
 		    reloc->read_domains & I915_GEM_DOMAIN_CPU) {
 			DRM_ERROR("reloc with read/write CPU domains: "
@@ -4445,8 +4505,7 @@
 i915_gem_idle(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t seqno, cur_seqno, last_seqno;
-	int stuck, ret;
+	int ret;
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -4455,115 +4514,36 @@
 		return 0;
 	}
 
-	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
-	 * We need to replace this with a semaphore, or something.
-	 */
-	dev_priv->mm.suspended = 1;
-	del_timer(&dev_priv->hangcheck_timer);
-
-	/* Cancel the retire work handler, wait for it to finish if running
-	 */
-	mutex_unlock(&dev->struct_mutex);
-	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
-	mutex_lock(&dev->struct_mutex);
-
-	i915_kernel_lost_context(dev);
-
-	/* Flush the GPU along with all non-CPU write domains
-	 */
-	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
-	seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
-
-	if (seqno == 0) {
-		mutex_unlock(&dev->struct_mutex);
-		return -ENOMEM;
-	}
-
-	dev_priv->mm.waiting_gem_seqno = seqno;
-	last_seqno = 0;
-	stuck = 0;
-	for (;;) {
-		cur_seqno = i915_get_gem_seqno(dev);
-		if (i915_seqno_passed(cur_seqno, seqno))
-			break;
-		if (last_seqno == cur_seqno) {
-			if (stuck++ > 100) {
-				DRM_ERROR("hardware wedged\n");
-				atomic_set(&dev_priv->mm.wedged, 1);
-				DRM_WAKEUP(&dev_priv->irq_queue);
-				break;
-			}
-		}
-		msleep(10);
-		last_seqno = cur_seqno;
-	}
-	dev_priv->mm.waiting_gem_seqno = 0;
-
-	i915_gem_retire_requests(dev);
-
-	spin_lock(&dev_priv->mm.active_list_lock);
-	if (!atomic_read(&dev_priv->mm.wedged)) {
-		/* Active and flushing should now be empty as we've
-		 * waited for a sequence higher than any pending execbuffer
-		 */
-		WARN_ON(!list_empty(&dev_priv->mm.active_list));
-		WARN_ON(!list_empty(&dev_priv->mm.flushing_list));
-		/* Request should now be empty as we've also waited
-		 * for the last request in the list
-		 */
-		WARN_ON(!list_empty(&dev_priv->mm.request_list));
-	}
-
-	/* Empty the active and flushing lists to inactive.  If there's
-	 * anything left at this point, it means that we're wedged and
-	 * nothing good's going to happen by leaving them there.  So strip
-	 * the GPU domains and just stuff them onto inactive.
-	 */
-	while (!list_empty(&dev_priv->mm.active_list)) {
-		struct drm_gem_object *obj;
-		uint32_t old_write_domain;
-
-		obj = list_first_entry(&dev_priv->mm.active_list,
-				       struct drm_i915_gem_object,
-				       list)->obj;
-		old_write_domain = obj->write_domain;
-		obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-		i915_gem_object_move_to_inactive(obj);
-
-		trace_i915_gem_object_change_domain(obj,
-						    obj->read_domains,
-						    old_write_domain);
-	}
-	spin_unlock(&dev_priv->mm.active_list_lock);
-
-	while (!list_empty(&dev_priv->mm.flushing_list)) {
-		struct drm_gem_object *obj;
-		uint32_t old_write_domain;
-
-		obj = list_first_entry(&dev_priv->mm.flushing_list,
-				       struct drm_i915_gem_object,
-				       list)->obj;
-		old_write_domain = obj->write_domain;
-		obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-		i915_gem_object_move_to_inactive(obj);
-
-		trace_i915_gem_object_change_domain(obj,
-						    obj->read_domains,
-						    old_write_domain);
-	}
-
-
-	/* Move all inactive buffers out of the GTT. */
-	ret = i915_gem_evict_from_inactive_list(dev);
-	WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
+	ret = i915_gpu_idle(dev);
 	if (ret) {
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
 	}
 
+	/* Under UMS, be paranoid and evict. */
+	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = i915_gem_evict_from_inactive_list(dev);
+		if (ret) {
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+	}
+
+	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
+	 * We need to replace this with a semaphore, or something.
+	 * And not confound mm.suspended!
+	 */
+	dev_priv->mm.suspended = 1;
+	del_timer(&dev_priv->hangcheck_timer);
+
+	i915_kernel_lost_context(dev);
 	i915_gem_cleanup_ringbuffer(dev);
+
 	mutex_unlock(&dev->struct_mutex);
 
+	/* Cancel the retire work handler, which should be idle now. */
+	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
+
 	return 0;
 }
 
@@ -4607,8 +4587,13 @@
 	}
 	dev_priv->hws_obj = obj;
 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
-	I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
-	I915_READ(HWS_PGA); /* posting read */
+	if (IS_GEN6(dev)) {
+		I915_WRITE(HWS_PGA_GEN6, dev_priv->status_gfx_addr);
+		I915_READ(HWS_PGA_GEN6); /* posting read */
+	} else {
+		I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
+		I915_READ(HWS_PGA); /* posting read */
+	}
 	DRM_DEBUG_DRIVER("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
 
 	return 0;
@@ -4850,7 +4835,8 @@
 	spin_unlock(&shrink_list_lock);
 
 	/* Old X drivers will take 0-2 for front, back, depth buffers */
-	dev_priv->fence_reg_start = 3;
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		dev_priv->fence_reg_start = 3;
 
 	if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 		dev_priv->num_fence_regs = 16;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index df278b2..b5c55d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -25,8 +25,6 @@
  *
  */
 
-#include <linux/acpi.h>
-#include <linux/pnp.h>
 #include "linux/string.h"
 #include "linux/bitops.h"
 #include "drmP.h"
@@ -83,120 +81,6 @@
  * to match what the GPU expects.
  */
 
-#define MCHBAR_I915 0x44
-#define MCHBAR_I965 0x48
-#define MCHBAR_SIZE (4*4096)
-
-#define DEVEN_REG 0x54
-#define   DEVEN_MCHBAR_EN (1 << 28)
-
-/* Allocate space for the MCH regs if needed, return nonzero on error */
-static int
-intel_alloc_mchbar_resource(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	int reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
-	u32 temp_lo, temp_hi = 0;
-	u64 mchbar_addr;
-	int ret = 0;
-
-	if (IS_I965G(dev))
-		pci_read_config_dword(dev_priv->bridge_dev, reg + 4, &temp_hi);
-	pci_read_config_dword(dev_priv->bridge_dev, reg, &temp_lo);
-	mchbar_addr = ((u64)temp_hi << 32) | temp_lo;
-
-	/* If ACPI doesn't have it, assume we need to allocate it ourselves */
-#ifdef CONFIG_PNP
-	if (mchbar_addr &&
-	    pnp_range_reserved(mchbar_addr, mchbar_addr + MCHBAR_SIZE)) {
-		ret = 0;
-		goto out;
-	}
-#endif
-
-	/* Get some space for it */
-	ret = pci_bus_alloc_resource(dev_priv->bridge_dev->bus, &dev_priv->mch_res,
-				     MCHBAR_SIZE, MCHBAR_SIZE,
-				     PCIBIOS_MIN_MEM,
-				     0,   pcibios_align_resource,
-				     dev_priv->bridge_dev);
-	if (ret) {
-		DRM_DEBUG_DRIVER("failed bus alloc: %d\n", ret);
-		dev_priv->mch_res.start = 0;
-		goto out;
-	}
-
-	if (IS_I965G(dev))
-		pci_write_config_dword(dev_priv->bridge_dev, reg + 4,
-				       upper_32_bits(dev_priv->mch_res.start));
-
-	pci_write_config_dword(dev_priv->bridge_dev, reg,
-			       lower_32_bits(dev_priv->mch_res.start));
-out:
-	return ret;
-}
-
-/* Setup MCHBAR if possible, return true if we should disable it again */
-static bool
-intel_setup_mchbar(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	int mchbar_reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
-	u32 temp;
-	bool need_disable = false, enabled;
-
-	if (IS_I915G(dev) || IS_I915GM(dev)) {
-		pci_read_config_dword(dev_priv->bridge_dev, DEVEN_REG, &temp);
-		enabled = !!(temp & DEVEN_MCHBAR_EN);
-	} else {
-		pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
-		enabled = temp & 1;
-	}
-
-	/* If it's already enabled, don't have to do anything */
-	if (enabled)
-		goto out;
-
-	if (intel_alloc_mchbar_resource(dev))
-		goto out;
-
-	need_disable = true;
-
-	/* Space is allocated or reserved, so enable it. */
-	if (IS_I915G(dev) || IS_I915GM(dev)) {
-		pci_write_config_dword(dev_priv->bridge_dev, DEVEN_REG,
-				       temp | DEVEN_MCHBAR_EN);
-	} else {
-		pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
-		pci_write_config_dword(dev_priv->bridge_dev, mchbar_reg, temp | 1);
-	}
-out:
-	return need_disable;
-}
-
-static void
-intel_teardown_mchbar(struct drm_device *dev, bool disable)
-{
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	int mchbar_reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915;
-	u32 temp;
-
-	if (disable) {
-		if (IS_I915G(dev) || IS_I915GM(dev)) {
-			pci_read_config_dword(dev_priv->bridge_dev, DEVEN_REG, &temp);
-			temp &= ~DEVEN_MCHBAR_EN;
-			pci_write_config_dword(dev_priv->bridge_dev, DEVEN_REG, temp);
-		} else {
-			pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
-			temp &= ~1;
-			pci_write_config_dword(dev_priv->bridge_dev, mchbar_reg, temp);
-		}
-	}
-
-	if (dev_priv->mch_res.start)
-		release_resource(&dev_priv->mch_res);
-}
-
 /**
  * Detects bit 6 swizzling of address lookup between IGD access and CPU
  * access through main memory.
@@ -207,9 +91,8 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-	bool need_disable;
 
-	if (IS_IRONLAKE(dev)) {
+	if (IS_IRONLAKE(dev) || IS_GEN6(dev)) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
 		 */
@@ -224,9 +107,6 @@
 	} else if (IS_MOBILE(dev)) {
 		uint32_t dcc;
 
-		/* Try to make sure MCHBAR is enabled before poking at it */
-		need_disable = intel_setup_mchbar(dev);
-
 		/* On mobile 9xx chipsets, channel interleave by the CPU is
 		 * determined by DCC.  For single-channel, neither the CPU
 		 * nor the GPU do swizzling.  For dual channel interleaved,
@@ -266,8 +146,6 @@
 			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 		}
-
-		intel_teardown_mchbar(dev, need_disable);
 	} else {
 		/* The 965, G33, and newer, have a very flexible memory
 		 * configuration.  It will enable dual-channel mode
@@ -302,39 +180,6 @@
 	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 }
 
-
-/**
- * Returns whether an object is currently fenceable.  If not, it may need
- * to be unbound and have its pitch adjusted.
- */
-bool
-i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj)
-{
-	struct drm_i915_gem_object *obj_priv = obj->driver_private;
-
-	if (IS_I965G(dev)) {
-		/* The 965 can have fences at any page boundary. */
-		if (obj->size & 4095)
-			return false;
-		return true;
-	} else if (IS_I9XX(dev)) {
-		if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK)
-			return false;
-	} else {
-		if (obj_priv->gtt_offset & ~I830_FENCE_START_MASK)
-			return false;
-	}
-
-	/* Power of two sized... */
-	if (obj->size & (obj->size - 1))
-		return false;
-
-	/* Objects must be size aligned as well */
-	if (obj_priv->gtt_offset & (obj->size - 1))
-		return false;
-	return true;
-}
-
 /* Check pitch constriants for all chips & tiling formats */
 bool
 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
@@ -391,7 +236,7 @@
 	return true;
 }
 
-static bool
+bool
 i915_gem_object_fence_offset_ok(struct drm_gem_object *obj, int tiling_mode)
 {
 	struct drm_device *dev = obj->dev;
@@ -438,9 +283,7 @@
 	obj_priv = obj->driver_private;
 
 	if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode)) {
-		mutex_lock(&dev->struct_mutex);
-		drm_gem_object_unreference(obj);
-		mutex_unlock(&dev->struct_mutex);
+		drm_gem_object_unreference_unlocked(obj);
 		return -EINVAL;
 	}
 
@@ -493,12 +336,6 @@
 			goto err;
 		}
 
-		/* If we've changed tiling, GTT-mappings of the object
-		 * need to re-fault to ensure that the correct fence register
-		 * setup is in place.
-		 */
-		i915_gem_release_mmap(obj);
-
 		obj_priv->tiling_mode = args->tiling_mode;
 		obj_priv->stride = args->stride;
 	}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a17d6bd..5388354 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -166,7 +166,7 @@
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		ironlake_enable_display_irq(dev_priv, DE_GSE);
 	else
 		i915_enable_pipestat(dev_priv, 1,
@@ -269,6 +269,57 @@
 	drm_sysfs_hotplug_event(dev);
 }
 
+static void i915_handle_rps_change(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	u32 busy_up, busy_down, max_avg, min_avg;
+	u16 rgvswctl;
+	u8 new_delay = dev_priv->cur_delay;
+
+	I915_WRITE(MEMINTRSTS, I915_READ(MEMINTRSTS) & ~MEMINT_EVAL_CHG);
+	busy_up = I915_READ(RCPREVBSYTUPAVG);
+	busy_down = I915_READ(RCPREVBSYTDNAVG);
+	max_avg = I915_READ(RCBMAXAVG);
+	min_avg = I915_READ(RCBMINAVG);
+
+	/* Handle RCS change request from hw */
+	if (busy_up > max_avg) {
+		if (dev_priv->cur_delay != dev_priv->max_delay)
+			new_delay = dev_priv->cur_delay - 1;
+		if (new_delay < dev_priv->max_delay)
+			new_delay = dev_priv->max_delay;
+	} else if (busy_down < min_avg) {
+		if (dev_priv->cur_delay != dev_priv->min_delay)
+			new_delay = dev_priv->cur_delay + 1;
+		if (new_delay > dev_priv->min_delay)
+			new_delay = dev_priv->min_delay;
+	}
+
+	DRM_DEBUG("rps change requested: %d -> %d\n",
+		  dev_priv->cur_delay, new_delay);
+
+	rgvswctl = I915_READ(MEMSWCTL);
+	if (rgvswctl & MEMCTL_CMD_STS) {
+		DRM_ERROR("gpu busy, RCS change rejected\n");
+		return; /* still busy with another command */
+	}
+
+	/* Program the new state */
+	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(new_delay << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+	POSTING_READ(MEMSWCTL);
+
+	rgvswctl |= MEMCTL_CMD_STS;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+
+	dev_priv->cur_delay = new_delay;
+
+	DRM_DEBUG("rps changed\n");
+
+	return;
+}
+
 irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -331,6 +382,11 @@
 		queue_work(dev_priv->wq, &dev_priv->hotplug_work);
 	}
 
+	if (de_iir & DE_PCU_EVENT) {
+		I915_WRITE(MEMINTRSTS, I915_READ(MEMINTRSTS));
+		i915_handle_rps_change(dev);
+	}
+
 	/* should clear PCH hotplug event before clear CPU irq */
 	I915_WRITE(SDEIIR, pch_iir);
 	I915_WRITE(GTIIR, gt_iir);
@@ -376,6 +432,121 @@
 	}
 }
 
+static struct drm_i915_error_object *
+i915_error_object_create(struct drm_device *dev,
+			 struct drm_gem_object *src)
+{
+	struct drm_i915_error_object *dst;
+	struct drm_i915_gem_object *src_priv;
+	int page, page_count;
+
+	if (src == NULL)
+		return NULL;
+
+	src_priv = src->driver_private;
+	if (src_priv->pages == NULL)
+		return NULL;
+
+	page_count = src->size / PAGE_SIZE;
+
+	dst = kmalloc(sizeof(*dst) + page_count * sizeof (u32 *), GFP_ATOMIC);
+	if (dst == NULL)
+		return NULL;
+
+	for (page = 0; page < page_count; page++) {
+		void *s, *d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
+		if (d == NULL)
+			goto unwind;
+		s = kmap_atomic(src_priv->pages[page], KM_USER0);
+		memcpy(d, s, PAGE_SIZE);
+		kunmap_atomic(s, KM_USER0);
+		dst->pages[page] = d;
+	}
+	dst->page_count = page_count;
+	dst->gtt_offset = src_priv->gtt_offset;
+
+	return dst;
+
+unwind:
+	while (page--)
+		kfree(dst->pages[page]);
+	kfree(dst);
+	return NULL;
+}
+
+static void
+i915_error_object_free(struct drm_i915_error_object *obj)
+{
+	int page;
+
+	if (obj == NULL)
+		return;
+
+	for (page = 0; page < obj->page_count; page++)
+		kfree(obj->pages[page]);
+
+	kfree(obj);
+}
+
+static void
+i915_error_state_free(struct drm_device *dev,
+		      struct drm_i915_error_state *error)
+{
+	i915_error_object_free(error->batchbuffer[0]);
+	i915_error_object_free(error->batchbuffer[1]);
+	i915_error_object_free(error->ringbuffer);
+	kfree(error->active_bo);
+	kfree(error);
+}
+
+static u32
+i915_get_bbaddr(struct drm_device *dev, u32 *ring)
+{
+	u32 cmd;
+
+	if (IS_I830(dev) || IS_845G(dev))
+		cmd = MI_BATCH_BUFFER;
+	else if (IS_I965G(dev))
+		cmd = (MI_BATCH_BUFFER_START | (2 << 6) |
+		       MI_BATCH_NON_SECURE_I965);
+	else
+		cmd = (MI_BATCH_BUFFER_START | (2 << 6));
+
+	return ring[0] == cmd ? ring[1] : 0;
+}
+
+static u32
+i915_ringbuffer_last_batch(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 head, bbaddr;
+	u32 *ring;
+
+	/* Locate the current position in the ringbuffer and walk back
+	 * to find the most recently dispatched batch buffer.
+	 */
+	bbaddr = 0;
+	head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+	ring = (u32 *)(dev_priv->ring.virtual_start + head);
+
+	while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
+		bbaddr = i915_get_bbaddr(dev, ring);
+		if (bbaddr)
+			break;
+	}
+
+	if (bbaddr == 0) {
+		ring = (u32 *)(dev_priv->ring.virtual_start + dev_priv->ring.Size);
+		while (--ring >= (u32 *)dev_priv->ring.virtual_start) {
+			bbaddr = i915_get_bbaddr(dev, ring);
+			if (bbaddr)
+				break;
+		}
+	}
+
+	return bbaddr;
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -388,19 +559,26 @@
 static void i915_capture_error_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv;
 	struct drm_i915_error_state *error;
+	struct drm_gem_object *batchbuffer[2];
 	unsigned long flags;
+	u32 bbaddr;
+	int count;
 
 	spin_lock_irqsave(&dev_priv->error_lock, flags);
-	if (dev_priv->first_error)
-		goto out;
+	error = dev_priv->first_error;
+	spin_unlock_irqrestore(&dev_priv->error_lock, flags);
+	if (error)
+		return;
 
 	error = kmalloc(sizeof(*error), GFP_ATOMIC);
 	if (!error) {
-		DRM_DEBUG_DRIVER("out ot memory, not capturing error state\n");
-		goto out;
+		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
+		return;
 	}
 
+	error->seqno = i915_get_gem_seqno(dev);
 	error->eir = I915_READ(EIR);
 	error->pgtbl_er = I915_READ(PGTBL_ER);
 	error->pipeastat = I915_READ(PIPEASTAT);
@@ -411,6 +589,7 @@
 		error->ipehr = I915_READ(IPEHR);
 		error->instdone = I915_READ(INSTDONE);
 		error->acthd = I915_READ(ACTHD);
+		error->bbaddr = 0;
 	} else {
 		error->ipeir = I915_READ(IPEIR_I965);
 		error->ipehr = I915_READ(IPEHR_I965);
@@ -418,14 +597,101 @@
 		error->instps = I915_READ(INSTPS);
 		error->instdone1 = I915_READ(INSTDONE1);
 		error->acthd = I915_READ(ACTHD_I965);
+		error->bbaddr = I915_READ64(BB_ADDR);
+	}
+
+	bbaddr = i915_ringbuffer_last_batch(dev);
+
+	/* Grab the current batchbuffer, most likely to have crashed. */
+	batchbuffer[0] = NULL;
+	batchbuffer[1] = NULL;
+	count = 0;
+	list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+		struct drm_gem_object *obj = obj_priv->obj;
+
+		if (batchbuffer[0] == NULL &&
+		    bbaddr >= obj_priv->gtt_offset &&
+		    bbaddr < obj_priv->gtt_offset + obj->size)
+			batchbuffer[0] = obj;
+
+		if (batchbuffer[1] == NULL &&
+		    error->acthd >= obj_priv->gtt_offset &&
+		    error->acthd < obj_priv->gtt_offset + obj->size &&
+		    batchbuffer[0] != obj)
+			batchbuffer[1] = obj;
+
+		count++;
+	}
+
+	/* We need to copy these to an anonymous buffer as the simplest
+	 * method to avoid being overwritten by userpace.
+	 */
+	error->batchbuffer[0] = i915_error_object_create(dev, batchbuffer[0]);
+	error->batchbuffer[1] = i915_error_object_create(dev, batchbuffer[1]);
+
+	/* Record the ringbuffer */
+	error->ringbuffer = i915_error_object_create(dev, dev_priv->ring.ring_obj);
+
+	/* Record buffers on the active list. */
+	error->active_bo = NULL;
+	error->active_bo_count = 0;
+
+	if (count)
+		error->active_bo = kmalloc(sizeof(*error->active_bo)*count,
+					   GFP_ATOMIC);
+
+	if (error->active_bo) {
+		int i = 0;
+		list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+			struct drm_gem_object *obj = obj_priv->obj;
+
+			error->active_bo[i].size = obj->size;
+			error->active_bo[i].name = obj->name;
+			error->active_bo[i].seqno = obj_priv->last_rendering_seqno;
+			error->active_bo[i].gtt_offset = obj_priv->gtt_offset;
+			error->active_bo[i].read_domains = obj->read_domains;
+			error->active_bo[i].write_domain = obj->write_domain;
+			error->active_bo[i].fence_reg = obj_priv->fence_reg;
+			error->active_bo[i].pinned = 0;
+			if (obj_priv->pin_count > 0)
+				error->active_bo[i].pinned = 1;
+			if (obj_priv->user_pin_count > 0)
+				error->active_bo[i].pinned = -1;
+			error->active_bo[i].tiling = obj_priv->tiling_mode;
+			error->active_bo[i].dirty = obj_priv->dirty;
+			error->active_bo[i].purgeable = obj_priv->madv != I915_MADV_WILLNEED;
+
+			if (++i == count)
+				break;
+		}
+		error->active_bo_count = i;
 	}
 
 	do_gettimeofday(&error->time);
 
-	dev_priv->first_error = error;
-
-out:
+	spin_lock_irqsave(&dev_priv->error_lock, flags);
+	if (dev_priv->first_error == NULL) {
+		dev_priv->first_error = error;
+		error = NULL;
+	}
 	spin_unlock_irqrestore(&dev_priv->error_lock, flags);
+
+	if (error)
+		i915_error_state_free(dev, error);
+}
+
+void i915_destroy_error_state(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_error_state *error;
+
+	spin_lock(&dev_priv->error_lock);
+	error = dev_priv->first_error;
+	dev_priv->first_error = NULL;
+	spin_unlock(&dev_priv->error_lock);
+
+	if (error)
+		i915_error_state_free(dev, error);
 }
 
 /**
@@ -576,7 +842,7 @@
 
 	atomic_inc(&dev_priv->irq_received);
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return ironlake_irq_handler(dev);
 
 	iir = I915_READ(IIR);
@@ -737,7 +1003,7 @@
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1)) {
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			ironlake_enable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
 		else
 			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
@@ -753,7 +1019,7 @@
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	BUG_ON(dev->irq_enabled && dev_priv->user_irq_refcount <= 0);
 	if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0)) {
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			ironlake_disable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
 		else
 			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
@@ -861,7 +1127,7 @@
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		ironlake_enable_display_irq(dev_priv, (pipe == 0) ? 
 					    DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
 	else if (IS_I965G(dev))
@@ -883,7 +1149,7 @@
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		ironlake_disable_display_irq(dev_priv, (pipe == 0) ? 
 					     DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
 	else
@@ -897,7 +1163,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (!IS_IRONLAKE(dev))
+	if (!HAS_PCH_SPLIT(dev))
 		opregion_enable_asle(dev);
 	dev_priv->irq_enabled = 1;
 }
@@ -973,7 +1239,11 @@
 	struct drm_device *dev = (struct drm_device *)data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	uint32_t acthd;
-       
+
+	/* No reset support on this chip yet. */
+	if (IS_GEN6(dev))
+		return;
+
 	if (!IS_I965G(dev))
 		acthd = I915_READ(ACTHD);
 	else
@@ -1064,6 +1334,13 @@
 	I915_WRITE(SDEIER, dev_priv->pch_irq_enable_reg);
 	(void) I915_READ(SDEIER);
 
+	if (IS_IRONLAKE_M(dev)) {
+		/* Clear & enable PCU event interrupts */
+		I915_WRITE(DEIIR, DE_PCU_EVENT);
+		I915_WRITE(DEIER, I915_READ(DEIER) | DE_PCU_EVENT);
+		ironlake_enable_display_irq(dev_priv, DE_PCU_EVENT);
+	}
+
 	return 0;
 }
 
@@ -1076,7 +1353,7 @@
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 	INIT_WORK(&dev_priv->error_work, i915_error_work_func);
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		ironlake_irq_preinstall(dev);
 		return;
 	}
@@ -1108,7 +1385,7 @@
 
 	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return ironlake_irq_postinstall(dev);
 
 	/* Unmask the interrupts that we always want on. */
@@ -1196,7 +1473,7 @@
 
 	dev_priv->vblank_pipe = 0;
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		ironlake_irq_uninstall(dev);
 		return;
 	}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ab1bd2d..3d59862 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -53,6 +53,25 @@
 #define INTEL_GMCH_GMS_STOLEN_224M	(0xc << 4)
 #define INTEL_GMCH_GMS_STOLEN_352M	(0xd << 4)
 
+#define SNB_GMCH_CTRL	0x50
+#define SNB_GMCH_GMS_STOLEN_MASK	0xF8
+#define SNB_GMCH_GMS_STOLEN_32M		(1 << 3)
+#define SNB_GMCH_GMS_STOLEN_64M		(2 << 3)
+#define SNB_GMCH_GMS_STOLEN_96M		(3 << 3)
+#define SNB_GMCH_GMS_STOLEN_128M	(4 << 3)
+#define SNB_GMCH_GMS_STOLEN_160M	(5 << 3)
+#define SNB_GMCH_GMS_STOLEN_192M	(6 << 3)
+#define SNB_GMCH_GMS_STOLEN_224M	(7 << 3)
+#define SNB_GMCH_GMS_STOLEN_256M	(8 << 3)
+#define SNB_GMCH_GMS_STOLEN_288M	(9 << 3)
+#define SNB_GMCH_GMS_STOLEN_320M	(0xa << 3)
+#define SNB_GMCH_GMS_STOLEN_352M	(0xb << 3)
+#define SNB_GMCH_GMS_STOLEN_384M	(0xc << 3)
+#define SNB_GMCH_GMS_STOLEN_416M	(0xd << 3)
+#define SNB_GMCH_GMS_STOLEN_448M	(0xe << 3)
+#define SNB_GMCH_GMS_STOLEN_480M	(0xf << 3)
+#define SNB_GMCH_GMS_STOLEN_512M	(0x10 << 3)
+
 /* PCI config space */
 
 #define HPLLCC	0xc0 /* 855 only */
@@ -61,6 +80,7 @@
 #define   GC_CLOCK_100_200		(1 << 0)
 #define   GC_CLOCK_100_133		(2 << 0)
 #define   GC_CLOCK_166_250		(3 << 0)
+#define GCFGC2	0xda
 #define GCFGC	0xf0 /* 915+ only */
 #define   GC_LOW_FREQUENCY_ENABLE	(1 << 7)
 #define   GC_DISPLAY_CLOCK_190_200_MHZ	(0 << 4)
@@ -234,6 +254,9 @@
 #define   I965_FENCE_REG_VALID		(1<<0)
 #define   I965_FENCE_MAX_PITCH_VAL	0x0400
 
+#define FENCE_REG_SANDYBRIDGE_0		0x100000
+#define   SANDYBRIDGE_FENCE_PITCH_SHIFT	32
+
 /*
  * Instruction and interrupt control regs
  */
@@ -265,6 +288,7 @@
 #define INSTDONE1	0x0207c /* 965+ only */
 #define ACTHD_I965	0x02074
 #define HWS_PGA		0x02080
+#define HWS_PGA_GEN6	0x04080
 #define HWS_ADDRESS_MASK	0xfffff000
 #define HWS_START_ADDRESS_SHIFT	4
 #define PWRCTXA		0x2088 /* 965GM+ only */
@@ -282,7 +306,7 @@
 #define   I915_PIPE_CONTROL_NOTIFY_INTERRUPT		(1<<18)
 #define   I915_DISPLAY_PORT_INTERRUPT			(1<<17)
 #define   I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT	(1<<15)
-#define   I915_GMCH_THERMAL_SENSOR_EVENT_INTERRUPT	(1<<14)
+#define   I915_GMCH_THERMAL_SENSOR_EVENT_INTERRUPT	(1<<14) /* p-state */
 #define   I915_HWB_OOM_INTERRUPT			(1<<13)
 #define   I915_SYNC_STATUS_INTERRUPT			(1<<12)
 #define   I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT	(1<<11)
@@ -306,11 +330,14 @@
 #define   I915_ERROR_MEMORY_REFRESH			(1<<1)
 #define   I915_ERROR_INSTRUCTION			(1<<0)
 #define INSTPM	        0x020c0
+#define   INSTPM_SELF_EN (1<<12) /* 915GM only */
 #define ACTHD	        0x020c8
 #define FW_BLC		0x020d8
 #define FW_BLC2	 	0x020dc
 #define FW_BLC_SELF	0x020e0 /* 915+ only */
-#define   FW_BLC_SELF_EN (1<<15)
+#define   FW_BLC_SELF_EN_MASK      (1<<31)
+#define   FW_BLC_SELF_FIFO_MASK    (1<<16) /* 945 only */
+#define   FW_BLC_SELF_EN           (1<<15) /* 945 only */
 #define MM_BURST_LENGTH     0x00700000
 #define MM_FIFO_WATERMARK   0x0001F000
 #define LM_BURST_LENGTH     0x00000700
@@ -324,6 +351,7 @@
 #define   CM0_COLOR_EVICT_DISABLE (1<<3)
 #define   CM0_DEPTH_WRITE_DISABLE (1<<1)
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
+#define BB_ADDR		0x02140 /* 8 bytes */
 #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
 
 
@@ -784,10 +812,144 @@
 #define CLKCFG_MEM_800					(3 << 4)
 #define CLKCFG_MEM_MASK					(7 << 4)
 
-/** GM965 GM45 render standby register */
-#define MCHBAR_RENDER_STANDBY	0x111B8
+#define CRSTANDVID		0x11100
+#define PXVFREQ_BASE		0x11110 /* P[0-15]VIDFREQ (0x1114c) (Ironlake) */
+#define   PXVFREQ_PX_MASK	0x7f000000
+#define   PXVFREQ_PX_SHIFT	24
+#define VIDFREQ_BASE		0x11110
+#define VIDFREQ1		0x11110 /* VIDFREQ1-4 (0x1111c) (Cantiga) */
+#define VIDFREQ2		0x11114
+#define VIDFREQ3		0x11118
+#define VIDFREQ4		0x1111c
+#define   VIDFREQ_P0_MASK	0x1f000000
+#define   VIDFREQ_P0_SHIFT	24
+#define   VIDFREQ_P0_CSCLK_MASK	0x00f00000
+#define   VIDFREQ_P0_CSCLK_SHIFT 20
+#define   VIDFREQ_P0_CRCLK_MASK	0x000f0000
+#define   VIDFREQ_P0_CRCLK_SHIFT 16
+#define   VIDFREQ_P1_MASK	0x00001f00
+#define   VIDFREQ_P1_SHIFT	8
+#define   VIDFREQ_P1_CSCLK_MASK	0x000000f0
+#define   VIDFREQ_P1_CSCLK_SHIFT 4
+#define   VIDFREQ_P1_CRCLK_MASK	0x0000000f
+#define INTTOEXT_BASE_ILK	0x11300
+#define INTTOEXT_BASE		0x11120 /* INTTOEXT1-8 (0x1113c) */
+#define   INTTOEXT_MAP3_SHIFT	24
+#define   INTTOEXT_MAP3_MASK	(0x1f << INTTOEXT_MAP3_SHIFT)
+#define   INTTOEXT_MAP2_SHIFT	16
+#define   INTTOEXT_MAP2_MASK	(0x1f << INTTOEXT_MAP2_SHIFT)
+#define   INTTOEXT_MAP1_SHIFT	8
+#define   INTTOEXT_MAP1_MASK	(0x1f << INTTOEXT_MAP1_SHIFT)
+#define   INTTOEXT_MAP0_SHIFT	0
+#define   INTTOEXT_MAP0_MASK	(0x1f << INTTOEXT_MAP0_SHIFT)
+#define MEMSWCTL		0x11170 /* Ironlake only */
+#define   MEMCTL_CMD_MASK	0xe000
+#define   MEMCTL_CMD_SHIFT	13
+#define   MEMCTL_CMD_RCLK_OFF	0
+#define   MEMCTL_CMD_RCLK_ON	1
+#define   MEMCTL_CMD_CHFREQ	2
+#define   MEMCTL_CMD_CHVID	3
+#define   MEMCTL_CMD_VMMOFF	4
+#define   MEMCTL_CMD_VMMON	5
+#define   MEMCTL_CMD_STS	(1<<12) /* write 1 triggers command, clears
+					   when command complete */
+#define   MEMCTL_FREQ_MASK	0x0f00 /* jitter, from 0-15 */
+#define   MEMCTL_FREQ_SHIFT	8
+#define   MEMCTL_SFCAVM		(1<<7)
+#define   MEMCTL_TGT_VID_MASK	0x007f
+#define MEMIHYST		0x1117c
+#define MEMINTREN		0x11180 /* 16 bits */
+#define   MEMINT_RSEXIT_EN	(1<<8)
+#define   MEMINT_CX_SUPR_EN	(1<<7)
+#define   MEMINT_CONT_BUSY_EN	(1<<6)
+#define   MEMINT_AVG_BUSY_EN	(1<<5)
+#define   MEMINT_EVAL_CHG_EN	(1<<4)
+#define   MEMINT_MON_IDLE_EN	(1<<3)
+#define   MEMINT_UP_EVAL_EN	(1<<2)
+#define   MEMINT_DOWN_EVAL_EN	(1<<1)
+#define   MEMINT_SW_CMD_EN	(1<<0)
+#define MEMINTRSTR		0x11182 /* 16 bits */
+#define   MEM_RSEXIT_MASK	0xc000
+#define   MEM_RSEXIT_SHIFT	14
+#define   MEM_CONT_BUSY_MASK	0x3000
+#define   MEM_CONT_BUSY_SHIFT	12
+#define   MEM_AVG_BUSY_MASK	0x0c00
+#define   MEM_AVG_BUSY_SHIFT	10
+#define   MEM_EVAL_CHG_MASK	0x0300
+#define   MEM_EVAL_BUSY_SHIFT	8
+#define   MEM_MON_IDLE_MASK	0x00c0
+#define   MEM_MON_IDLE_SHIFT	6
+#define   MEM_UP_EVAL_MASK	0x0030
+#define   MEM_UP_EVAL_SHIFT	4
+#define   MEM_DOWN_EVAL_MASK	0x000c
+#define   MEM_DOWN_EVAL_SHIFT	2
+#define   MEM_SW_CMD_MASK	0x0003
+#define   MEM_INT_STEER_GFX	0
+#define   MEM_INT_STEER_CMR	1
+#define   MEM_INT_STEER_SMI	2
+#define   MEM_INT_STEER_SCI	3
+#define MEMINTRSTS		0x11184
+#define   MEMINT_RSEXIT		(1<<7)
+#define   MEMINT_CONT_BUSY	(1<<6)
+#define   MEMINT_AVG_BUSY	(1<<5)
+#define   MEMINT_EVAL_CHG	(1<<4)
+#define   MEMINT_MON_IDLE	(1<<3)
+#define   MEMINT_UP_EVAL	(1<<2)
+#define   MEMINT_DOWN_EVAL	(1<<1)
+#define   MEMINT_SW_CMD		(1<<0)
+#define MEMMODECTL		0x11190
+#define   MEMMODE_BOOST_EN	(1<<31)
+#define   MEMMODE_BOOST_FREQ_MASK 0x0f000000 /* jitter for boost, 0-15 */
+#define   MEMMODE_BOOST_FREQ_SHIFT 24
+#define   MEMMODE_IDLE_MODE_MASK 0x00030000
+#define   MEMMODE_IDLE_MODE_SHIFT 16
+#define   MEMMODE_IDLE_MODE_EVAL 0
+#define   MEMMODE_IDLE_MODE_CONT 1
+#define   MEMMODE_HWIDLE_EN	(1<<15)
+#define   MEMMODE_SWMODE_EN	(1<<14)
+#define   MEMMODE_RCLK_GATE	(1<<13)
+#define   MEMMODE_HW_UPDATE	(1<<12)
+#define   MEMMODE_FSTART_MASK	0x00000f00 /* starting jitter, 0-15 */
+#define   MEMMODE_FSTART_SHIFT	8
+#define   MEMMODE_FMAX_MASK	0x000000f0 /* max jitter, 0-15 */
+#define   MEMMODE_FMAX_SHIFT	4
+#define   MEMMODE_FMIN_MASK	0x0000000f /* min jitter, 0-15 */
+#define RCBMAXAVG		0x1119c
+#define MEMSWCTL2		0x1119e /* Cantiga only */
+#define   SWMEMCMD_RENDER_OFF	(0 << 13)
+#define   SWMEMCMD_RENDER_ON	(1 << 13)
+#define   SWMEMCMD_SWFREQ	(2 << 13)
+#define   SWMEMCMD_TARVID	(3 << 13)
+#define   SWMEMCMD_VRM_OFF	(4 << 13)
+#define   SWMEMCMD_VRM_ON	(5 << 13)
+#define   CMDSTS		(1<<12)
+#define   SFCAVM		(1<<11)
+#define   SWFREQ_MASK		0x0380 /* P0-7 */
+#define   SWFREQ_SHIFT		7
+#define   TARVID_MASK		0x001f
+#define MEMSTAT_CTG		0x111a0
+#define RCBMINAVG		0x111a0
+#define RCUPEI			0x111b0
+#define RCDNEI			0x111b4
+#define MCHBAR_RENDER_STANDBY		0x111b8
 #define   RCX_SW_EXIT		(1<<23)
 #define   RSX_STATUS_MASK	0x00700000
+#define VIDCTL			0x111c0
+#define VIDSTS			0x111c8
+#define VIDSTART		0x111cc /* 8 bits */
+#define MEMSTAT_ILK			0x111f8
+#define   MEMSTAT_VID_MASK	0x7f00
+#define   MEMSTAT_VID_SHIFT	8
+#define   MEMSTAT_PSTATE_MASK	0x00f8
+#define   MEMSTAT_PSTATE_SHIFT  3
+#define   MEMSTAT_MON_ACTV	(1<<2)
+#define   MEMSTAT_SRC_CTL_MASK	0x0003
+#define   MEMSTAT_SRC_CTL_CORE	0
+#define   MEMSTAT_SRC_CTL_TRB	1
+#define   MEMSTAT_SRC_CTL_THM	2
+#define   MEMSTAT_SRC_CTL_STDBY 3
+#define RCPREVBSYTUPAVG		0x113b8
+#define RCPREVBSYTDNAVG		0x113bc
 #define PEG_BAND_GAP_DATA	0x14d68
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index a3b90c9..ac0d1a7 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -682,6 +682,8 @@
 		I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS);
 		I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR);
 		I915_WRITE(PCH_PP_CONTROL, dev_priv->savePP_CONTROL);
+		I915_WRITE(MCHBAR_RENDER_STANDBY,
+			   dev_priv->saveMCHBAR_RENDER_STANDBY);
 	} else {
 		I915_WRITE(PFIT_PGM_RATIOS, dev_priv->savePFIT_PGM_RATIOS);
 		I915_WRITE(BLC_PWM_CTL, dev_priv->saveBLC_PWM_CTL);
@@ -745,11 +747,16 @@
 		dev_priv->saveGTIMR = I915_READ(GTIMR);
 		dev_priv->saveFDI_RXA_IMR = I915_READ(FDI_RXA_IMR);
 		dev_priv->saveFDI_RXB_IMR = I915_READ(FDI_RXB_IMR);
+		dev_priv->saveMCHBAR_RENDER_STANDBY =
+			I915_READ(MCHBAR_RENDER_STANDBY);
 	} else {
 		dev_priv->saveIER = I915_READ(IER);
 		dev_priv->saveIMR = I915_READ(IMR);
 	}
 
+	if (IS_IRONLAKE_M(dev))
+		ironlake_disable_drps(dev);
+
 	/* Cache mode state */
 	dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
 
@@ -820,6 +827,9 @@
 	/* Clock gating state */
 	intel_init_clock_gating(dev);
 
+	if (IS_IRONLAKE_M(dev))
+		ironlake_enable_drps(dev);
+
 	/* Cache mode state */
 	I915_WRITE (CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000);
 
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 15fbc1b..70c9d4b 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -247,6 +247,7 @@
 parse_general_features(struct drm_i915_private *dev_priv,
 		       struct bdb_header *bdb)
 {
+	struct drm_device *dev = dev_priv->dev;
 	struct bdb_general_features *general;
 
 	/* Set sensible defaults in case we can't find the general block */
@@ -263,7 +264,7 @@
 			if (IS_I85X(dev_priv->dev))
 				dev_priv->lvds_ssc_freq =
 					general->ssc_freq ? 66 : 48;
-			else if (IS_IRONLAKE(dev_priv->dev))
+			else if (IS_IRONLAKE(dev_priv->dev) || IS_GEN6(dev))
 				dev_priv->lvds_ssc_freq =
 					general->ssc_freq ? 100 : 120;
 			else
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 79dd402..fccf074 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -39,7 +39,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 temp, reg;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		reg = PCH_ADPA;
 	else
 		reg = ADPA;
@@ -113,7 +113,7 @@
 	else
 		dpll_md_reg = DPLL_B_MD;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		adpa_reg = PCH_ADPA;
 	else
 		adpa_reg = ADPA;
@@ -122,7 +122,7 @@
 	 * Disable separate mode multiplier used when cloning SDVO to CRT
 	 * XXX this needs to be adjusted when we really are cloning
 	 */
-	if (IS_I965G(dev) && !IS_IRONLAKE(dev)) {
+	if (IS_I965G(dev) && !HAS_PCH_SPLIT(dev)) {
 		dpll_md = I915_READ(dpll_md_reg);
 		I915_WRITE(dpll_md_reg,
 			   dpll_md & ~DPLL_MD_UDI_MULTIPLIER_MASK);
@@ -136,11 +136,11 @@
 
 	if (intel_crtc->pipe == 0) {
 		adpa |= ADPA_PIPE_A_SELECT;
-		if (!IS_IRONLAKE(dev))
+		if (!HAS_PCH_SPLIT(dev))
 			I915_WRITE(BCLRPAT_A, 0);
 	} else {
 		adpa |= ADPA_PIPE_B_SELECT;
-		if (!IS_IRONLAKE(dev))
+		if (!HAS_PCH_SPLIT(dev))
 			I915_WRITE(BCLRPAT_B, 0);
 	}
 
@@ -202,7 +202,7 @@
 	u32 hotplug_en;
 	int i, tries = 0;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return intel_ironlake_crt_detect_hotplug(connector);
 
 	/*
@@ -524,7 +524,7 @@
 					  &intel_output->enc);
 
 	/* Set up the DDC bus. */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		i2c_reg = PCH_GPIOA;
 	else {
 		i2c_reg = GPIOA;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b27202d..9cd6de5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -232,7 +232,7 @@
 #define G4X_P2_DISPLAY_PORT_FAST           10
 #define G4X_P2_DISPLAY_PORT_LIMIT          0
 
-/* Ironlake */
+/* Ironlake / Sandybridge */
 /* as we calculate clock using (register_value + 2) for
    N/M1/M2, so here the range value for them is (actual_value-2).
  */
@@ -690,7 +690,7 @@
 	struct drm_device *dev = crtc->dev;
 	const intel_limit_t *limit;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		limit = intel_ironlake_limit(crtc);
 	else if (IS_G4X(dev)) {
 		limit = intel_g4x_limit(crtc);
@@ -886,7 +886,7 @@
 	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
 		int lvds_reg;
 
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			lvds_reg = PCH_LVDS;
 		else
 			lvds_reg = LVDS;
@@ -1188,25 +1188,30 @@
 	if (intel_fb->obj->size > dev_priv->cfb_size) {
 		DRM_DEBUG_KMS("framebuffer too large, disabling "
 				"compression\n");
+		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
 		goto out_disable;
 	}
 	if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
 	    (mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
 		DRM_DEBUG_KMS("mode incompatible with compression, "
 				"disabling\n");
+		dev_priv->no_fbc_reason = FBC_UNSUPPORTED_MODE;
 		goto out_disable;
 	}
 	if ((mode->hdisplay > 2048) ||
 	    (mode->vdisplay > 1536)) {
 		DRM_DEBUG_KMS("mode too large for compression, disabling\n");
+		dev_priv->no_fbc_reason = FBC_MODE_TOO_LARGE;
 		goto out_disable;
 	}
 	if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) {
 		DRM_DEBUG_KMS("plane not 0, disabling compression\n");
+		dev_priv->no_fbc_reason = FBC_BAD_PLANE;
 		goto out_disable;
 	}
 	if (obj_priv->tiling_mode != I915_TILING_X) {
 		DRM_DEBUG_KMS("framebuffer not tiled, disabling compression\n");
+		dev_priv->no_fbc_reason = FBC_NOT_TILED;
 		goto out_disable;
 	}
 
@@ -1366,7 +1371,7 @@
 			dspcntr &= ~DISPPLANE_TILED;
 	}
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		/* must disable */
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
 
@@ -1427,7 +1432,7 @@
 	u8 sr1;
 	u32 vga_reg;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		vga_reg = CPU_VGACNTRL;
 	else
 		vga_reg = VGACNTRL;
@@ -2111,7 +2116,7 @@
 				  struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = crtc->dev;
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		/* FDI link clock is fixed at 2.7G */
 		if (mode->clock * 3 > 27000 * 4)
 			return MODE_CLOCK_HIGH;
@@ -2757,11 +2762,22 @@
 		srwm = total_size - sr_entries;
 		if (srwm < 0)
 			srwm = 1;
-		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN | (srwm & 0x3f));
+
+		if (IS_I945G(dev) || IS_I945GM(dev))
+			I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
+		else if (IS_I915GM(dev)) {
+			/* 915M has a smaller SRWM field */
+			I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
+			I915_WRITE(INSTPM, I915_READ(INSTPM) | INSTPM_SELF_EN);
+		}
 	} else {
 		/* Turn off self refresh if both pipes are enabled */
-		I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
-					& ~FW_BLC_SELF_EN);
+		if (IS_I945G(dev) || IS_I945GM(dev)) {
+			I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
+				   & ~FW_BLC_SELF_EN);
+		} else if (IS_I915GM(dev)) {
+			I915_WRITE(INSTPM, I915_READ(INSTPM) & ~INSTPM_SELF_EN);
+		}
 	}
 
 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
@@ -2967,7 +2983,7 @@
 					refclk / 1000);
 	} else if (IS_I9XX(dev)) {
 		refclk = 96000;
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			refclk = 120000; /* 120Mhz refclk */
 	} else {
 		refclk = 48000;
@@ -3025,7 +3041,7 @@
 	}
 
 	/* FDI link */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		int lane, link_bw, bpp;
 		/* eDP doesn't require FDI link, so just set DP M/N
 		   according to current link config */
@@ -3102,7 +3118,7 @@
 	 * PCH B stepping, previous chipset stepping should be
 	 * ignoring this setting.
 	 */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		temp = I915_READ(PCH_DREF_CONTROL);
 		/* Always enable nonspread source */
 		temp &= ~DREF_NONSPREAD_SOURCE_MASK;
@@ -3149,7 +3165,7 @@
 				reduced_clock.m2;
 	}
 
-	if (!IS_IRONLAKE(dev))
+	if (!HAS_PCH_SPLIT(dev))
 		dpll = DPLL_VGA_MODE_DIS;
 
 	if (IS_I9XX(dev)) {
@@ -3162,7 +3178,7 @@
 			sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
 			if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 				dpll |= (sdvo_pixel_multiply - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
-			else if (IS_IRONLAKE(dev))
+			else if (HAS_PCH_SPLIT(dev))
 				dpll |= (sdvo_pixel_multiply - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 		}
 		if (is_dp)
@@ -3174,7 +3190,7 @@
 		else {
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
 			/* also FPA1 */
-			if (IS_IRONLAKE(dev))
+			if (HAS_PCH_SPLIT(dev))
 				dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 			if (IS_G4X(dev) && has_reduced_clock)
 				dpll |= (1 << (reduced_clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
@@ -3193,7 +3209,7 @@
 			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
 			break;
 		}
-		if (IS_I965G(dev) && !IS_IRONLAKE(dev))
+		if (IS_I965G(dev) && !HAS_PCH_SPLIT(dev))
 			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
 	} else {
 		if (is_lvds) {
@@ -3227,7 +3243,7 @@
 
 	/* Ironlake's plane is forced to pipe, bit 24 is to
 	   enable color space conversion */
-	if (!IS_IRONLAKE(dev)) {
+	if (!HAS_PCH_SPLIT(dev)) {
 		if (pipe == 0)
 			dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
 		else
@@ -3254,14 +3270,14 @@
 
 
 	/* Disable the panel fitter if it was on our pipe */
-	if (!IS_IRONLAKE(dev) && intel_panel_fitter_pipe(dev) == pipe)
+	if (!HAS_PCH_SPLIT(dev) && intel_panel_fitter_pipe(dev) == pipe)
 		I915_WRITE(PFIT_CONTROL, 0);
 
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
 	/* assign to Ironlake registers */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		fp_reg = pch_fp_reg;
 		dpll_reg = pch_dpll_reg;
 	}
@@ -3282,7 +3298,7 @@
 	if (is_lvds) {
 		u32 lvds;
 
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			lvds_reg = PCH_LVDS;
 
 		lvds = I915_READ(lvds_reg);
@@ -3304,12 +3320,12 @@
 		/* set the dithering flag */
 		if (IS_I965G(dev)) {
 			if (dev_priv->lvds_dither) {
-				if (IS_IRONLAKE(dev))
+				if (HAS_PCH_SPLIT(dev))
 					pipeconf |= PIPE_ENABLE_DITHER;
 				else
 					lvds |= LVDS_ENABLE_DITHER;
 			} else {
-				if (IS_IRONLAKE(dev))
+				if (HAS_PCH_SPLIT(dev))
 					pipeconf &= ~PIPE_ENABLE_DITHER;
 				else
 					lvds &= ~LVDS_ENABLE_DITHER;
@@ -3328,7 +3344,7 @@
 		/* Wait for the clocks to stabilize. */
 		udelay(150);
 
-		if (IS_I965G(dev) && !IS_IRONLAKE(dev)) {
+		if (IS_I965G(dev) && !HAS_PCH_SPLIT(dev)) {
 			if (is_sdvo) {
 				sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
 				I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
@@ -3375,14 +3391,14 @@
 	/* pipesrc and dspsize control the size that is scaled from, which should
 	 * always be the user's requested size.
 	 */
-	if (!IS_IRONLAKE(dev)) {
+	if (!HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(dspsize_reg, ((mode->vdisplay - 1) << 16) |
 				(mode->hdisplay - 1));
 		I915_WRITE(dsppos_reg, 0);
 	}
 	I915_WRITE(pipesrc_reg, ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(data_m1_reg, TU_SIZE(m_n.tu) | m_n.gmch_m);
 		I915_WRITE(data_n1_reg, TU_SIZE(m_n.tu) | m_n.gmch_n);
 		I915_WRITE(link_m1_reg, m_n.link_m);
@@ -3438,7 +3454,7 @@
 		return;
 
 	/* use legacy palette for Ironlake */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		palreg = (intel_crtc->pipe == 0) ? LGC_PALETTE_A :
 						   LGC_PALETTE_B;
 
@@ -3553,11 +3569,10 @@
 	intel_crtc->cursor_bo = bo;
 
 	return 0;
-fail:
-	mutex_lock(&dev->struct_mutex);
 fail_locked:
-	drm_gem_object_unreference(bo);
 	mutex_unlock(&dev->struct_mutex);
+fail:
+	drm_gem_object_unreference_unlocked(bo);
 	return ret;
 }
 
@@ -3922,7 +3937,7 @@
 	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
 	int dpll = I915_READ(dpll_reg);
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return;
 
 	if (!dev_priv->lvds_downclock_avail)
@@ -3961,7 +3976,7 @@
 	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
 	int dpll = I915_READ(dpll_reg);
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return;
 
 	if (!dev_priv->lvds_downclock_avail)
@@ -4011,6 +4026,11 @@
 
 	mutex_lock(&dev->struct_mutex);
 
+	if (IS_I945G(dev) || IS_I945GM(dev)) {
+		DRM_DEBUG_DRIVER("enable memory self refresh on 945\n");
+		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN_MASK | FW_BLC_SELF_EN);
+	}
+
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		/* Skip inactive CRTCs */
 		if (!crtc->fb)
@@ -4044,9 +4064,17 @@
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return;
 
-	if (!dev_priv->busy)
+	if (!dev_priv->busy) {
+		if (IS_I945G(dev) || IS_I945GM(dev)) {
+			u32 fw_blc_self;
+
+			DRM_DEBUG_DRIVER("disable memory self refresh on 945\n");
+			fw_blc_self = I915_READ(FW_BLC_SELF);
+			fw_blc_self &= ~FW_BLC_SELF_EN;
+			I915_WRITE(FW_BLC_SELF, fw_blc_self | FW_BLC_SELF_EN_MASK);
+		}
 		dev_priv->busy = true;
-	else
+	} else
 		mod_timer(&dev_priv->idle_timer, jiffies +
 			  msecs_to_jiffies(GPU_IDLE_TIMEOUT));
 
@@ -4058,6 +4086,14 @@
 		intel_fb = to_intel_framebuffer(crtc->fb);
 		if (intel_fb->obj == obj) {
 			if (!intel_crtc->busy) {
+				if (IS_I945G(dev) || IS_I945GM(dev)) {
+					u32 fw_blc_self;
+
+					DRM_DEBUG_DRIVER("disable memory self refresh on 945\n");
+					fw_blc_self = I915_READ(FW_BLC_SELF);
+					fw_blc_self &= ~FW_BLC_SELF_EN;
+					I915_WRITE(FW_BLC_SELF, fw_blc_self | FW_BLC_SELF_EN_MASK);
+				}
 				/* Non-busy -> busy, upclock */
 				intel_increase_pllclock(crtc, true);
 				intel_crtc->busy = true;
@@ -4382,7 +4418,7 @@
 	if (IS_MOBILE(dev) && !IS_I830(dev))
 		intel_lvds_init(dev);
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		int found;
 
 		if (IS_MOBILE(dev) && (I915_READ(DP_A) & DP_DETECTED))
@@ -4451,7 +4487,7 @@
 			DRM_DEBUG_KMS("probing DP_D\n");
 			intel_dp_init(dev, DP_D);
 		}
-	} else if (IS_I8XX(dev))
+	} else if (IS_GEN2(dev))
 		intel_dvo_init(dev);
 
 	if (SUPPORTS_TV(dev))
@@ -4476,9 +4512,7 @@
 		intelfb_remove(dev, fb);
 
 	drm_framebuffer_cleanup(fb);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(intel_fb->obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(intel_fb->obj);
 
 	kfree(intel_fb);
 }
@@ -4541,9 +4575,7 @@
 
 	ret = intel_framebuffer_create(dev, mode_cmd, &fb, obj);
 	if (ret) {
-		mutex_lock(&dev->struct_mutex);
-		drm_gem_object_unreference(obj);
-		mutex_unlock(&dev->struct_mutex);
+		drm_gem_object_unreference_unlocked(obj);
 		return NULL;
 	}
 
@@ -4591,6 +4623,91 @@
 	return NULL;
 }
 
+void ironlake_enable_drps(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 rgvmodectl = I915_READ(MEMMODECTL), rgvswctl;
+	u8 fmax, fmin, fstart, vstart;
+	int i = 0;
+
+	/* 100ms RC evaluation intervals */
+	I915_WRITE(RCUPEI, 100000);
+	I915_WRITE(RCDNEI, 100000);
+
+	/* Set max/min thresholds to 90ms and 80ms respectively */
+	I915_WRITE(RCBMAXAVG, 90000);
+	I915_WRITE(RCBMINAVG, 80000);
+
+	I915_WRITE(MEMIHYST, 1);
+
+	/* Set up min, max, and cur for interrupt handling */
+	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+	vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
+		PXVFREQ_PX_SHIFT;
+
+	dev_priv->max_delay = fstart; /* can't go to fmax w/o IPS */
+	dev_priv->min_delay = fmin;
+	dev_priv->cur_delay = fstart;
+
+	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+
+	/*
+	 * Interrupts will be enabled in ironlake_irq_postinstall
+	 */
+
+	I915_WRITE(VIDSTART, vstart);
+	POSTING_READ(VIDSTART);
+
+	rgvmodectl |= MEMMODE_SWMODE_EN;
+	I915_WRITE(MEMMODECTL, rgvmodectl);
+
+	while (I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) {
+		if (i++ > 100) {
+			DRM_ERROR("stuck trying to change perf mode\n");
+			break;
+		}
+		msleep(1);
+	}
+	msleep(1);
+
+	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(fstart << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+	POSTING_READ(MEMSWCTL);
+
+	rgvswctl |= MEMCTL_CMD_STS;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+}
+
+void ironlake_disable_drps(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 rgvswctl;
+	u8 fstart;
+
+	/* Ack interrupts, disable EFC interrupt */
+	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
+	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
+	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
+	I915_WRITE(DEIIR, DE_PCU_EVENT);
+	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+
+	/* Go back to the starting frequency */
+	fstart = (I915_READ(MEMMODECTL) & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(fstart << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+	msleep(1);
+	rgvswctl |= MEMCTL_CMD_STS;
+	I915_WRITE(MEMSWCTL, rgvswctl);
+	msleep(1);
+
+}
+
 void intel_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4599,7 +4716,7 @@
 	 * Disable clock gating reported to work incorrectly according to the
 	 * specs, but enable as much else as we can.
 	 */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		return;
 	} else if (IS_G4X(dev)) {
 		uint32_t dspclk_gate;
@@ -4672,7 +4789,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* We always want a DPMS function */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		dev_priv->display.dpms = ironlake_crtc_dpms;
 	else
 		dev_priv->display.dpms = i9xx_crtc_dpms;
@@ -4715,7 +4832,7 @@
 			i830_get_display_clock_speed;
 
 	/* For FIFO watermark updates */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		dev_priv->display.update_wm = NULL;
 	else if (IS_G4X(dev))
 		dev_priv->display.update_wm = g4x_update_wm;
@@ -4774,11 +4891,6 @@
 	DRM_DEBUG_KMS("%d display pipe%s available.\n",
 		  num_pipe, num_pipe > 1 ? "s" : "");
 
-	if (IS_I85X(dev))
-		pci_read_config_word(dev->pdev, HPLLCC, &dev_priv->orig_clock);
-	else if (IS_I9XX(dev) || IS_G4X(dev))
-		pci_read_config_word(dev->pdev, GCFGC, &dev_priv->orig_clock);
-
 	for (i = 0; i < num_pipe; i++) {
 		intel_crtc_init(dev, i);
 	}
@@ -4787,6 +4899,9 @@
 
 	intel_init_clock_gating(dev);
 
+	if (IS_IRONLAKE_M(dev))
+		ironlake_enable_drps(dev);
+
 	INIT_WORK(&dev_priv->idle_work, intel_idle_update);
 	setup_timer(&dev_priv->idle_timer, intel_gpu_idle_timer,
 		    (unsigned long)dev);
@@ -4834,6 +4949,9 @@
 		drm_gem_object_unreference(dev_priv->pwrctx);
 	}
 
+	if (IS_IRONLAKE_M(dev))
+		ironlake_disable_drps(dev);
+
 	mutex_unlock(&dev->struct_mutex);
 
 	drm_mode_config_cleanup(dev);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 439506c..3ef3a0d 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -231,7 +231,7 @@
 	 */
 	if (IS_eDP(intel_output))
 		aux_clock_divider = 225; /* eDP input clock at 450Mhz */
-	else if (IS_IRONLAKE(dev))
+	else if (HAS_PCH_SPLIT(dev))
 		aux_clock_divider = 62; /* IRL input clock fixed at 125Mhz */
 	else
 		aux_clock_divider = intel_hrawclk(dev) / 2;
@@ -584,7 +584,7 @@
 	intel_dp_compute_m_n(3, lane_count,
 			     mode->clock, adjusted_mode->clock, &m_n);
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		if (intel_crtc->pipe == 0) {
 			I915_WRITE(TRANSA_DATA_M1,
 				   ((m_n.tu - 1) << PIPE_GMCH_DATA_M_TU_SIZE_SHIFT) |
@@ -1176,7 +1176,7 @@
 
 	dp_priv->has_audio = false;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return ironlake_dp_detect(connector);
 
 	temp = I915_READ(PORT_HOTPLUG_EN);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a51573d..3a467ca 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -209,6 +209,8 @@
 extern void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
 				    u16 *blue, int regno);
 extern void intel_init_clock_gating(struct drm_device *dev);
+extern void ironlake_enable_drps(struct drm_device *dev);
+extern void ironlake_disable_drps(struct drm_device *dev);
 
 extern int intel_framebuffer_create(struct drm_device *dev,
 				    struct drm_mode_fb_cmd *mode_cmd,
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index aaabbcb..8cd791d 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/fb.h>
 #include <linux/init.h>
+#include <linux/vga_switcheroo.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -235,6 +236,7 @@
 			obj_priv->gtt_offset, fbo);
 
 	mutex_unlock(&dev->struct_mutex);
+	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unpin:
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 0e268de..a30f8bf 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -82,7 +82,7 @@
 	/* HW workaround, need to toggle enable bit off and on for 12bpc, but
 	 * we do this anyway which shows more stable in testing.
 	 */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(hdmi_priv->sdvox_reg, temp & ~SDVO_ENABLE);
 		POSTING_READ(hdmi_priv->sdvox_reg);
 	}
@@ -99,7 +99,7 @@
 	/* HW workaround, need to write this twice for issue that may result
 	 * in first write getting masked.
 	 */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(hdmi_priv->sdvox_reg, temp);
 		POSTING_READ(hdmi_priv->sdvox_reg);
 	}
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 8673c73..fcc753c 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -128,7 +128,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(PCH_GMBUS0, 0);
 	} else {
 		I915_WRITE(GMBUS0, 0);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index c2e8a45..14e516f 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -56,7 +56,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 blc_pwm_ctl, reg;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		reg = BLC_PWM_CPU_CTL;
 	else
 		reg = BLC_PWM_CTL;
@@ -74,7 +74,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 reg;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		reg = BLC_PWM_PCH_CTL2;
 	else
 		reg = BLC_PWM_CTL;
@@ -89,17 +89,22 @@
 static void intel_lvds_set_power(struct drm_device *dev, bool on)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 pp_status, ctl_reg, status_reg;
+	u32 pp_status, ctl_reg, status_reg, lvds_reg;
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		ctl_reg = PCH_PP_CONTROL;
 		status_reg = PCH_PP_STATUS;
+		lvds_reg = PCH_LVDS;
 	} else {
 		ctl_reg = PP_CONTROL;
 		status_reg = PP_STATUS;
+		lvds_reg = LVDS;
 	}
 
 	if (on) {
+		I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN);
+		POSTING_READ(lvds_reg);
+
 		I915_WRITE(ctl_reg, I915_READ(ctl_reg) |
 			   POWER_TARGET_ON);
 		do {
@@ -115,6 +120,9 @@
 		do {
 			pp_status = I915_READ(status_reg);
 		} while (pp_status & PP_ON);
+
+		I915_WRITE(lvds_reg, I915_READ(lvds_reg) & ~LVDS_PORT_EN);
+		POSTING_READ(lvds_reg);
 	}
 }
 
@@ -137,7 +145,7 @@
 	u32 pp_on_reg, pp_off_reg, pp_ctl_reg, pp_div_reg;
 	u32 pwm_ctl_reg;
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		pp_on_reg = PCH_PP_ON_DELAYS;
 		pp_off_reg = PCH_PP_OFF_DELAYS;
 		pp_ctl_reg = PCH_PP_CONTROL;
@@ -174,7 +182,7 @@
 	u32 pp_on_reg, pp_off_reg, pp_ctl_reg, pp_div_reg;
 	u32 pwm_ctl_reg;
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		pp_on_reg = PCH_PP_ON_DELAYS;
 		pp_off_reg = PCH_PP_OFF_DELAYS;
 		pp_ctl_reg = PCH_PP_CONTROL;
@@ -297,7 +305,7 @@
 	}
 
 	/* full screen scale for now */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		goto out;
 
 	/* 965+ wants fuzzy fitting */
@@ -327,7 +335,7 @@
 	 * to register description and PRM.
 	 * Change the value here to see the borders for debugging
 	 */
-	if (!IS_IRONLAKE(dev)) {
+	if (!HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(BCLRPAT_A, 0);
 		I915_WRITE(BCLRPAT_B, 0);
 	}
@@ -548,7 +556,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 reg;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		reg = BLC_PWM_CPU_CTL;
 	else
 		reg = BLC_PWM_CTL;
@@ -587,7 +595,7 @@
 	 * settings.
 	 */
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return;
 
 	/*
@@ -655,8 +663,15 @@
  */
 static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
 {
+	struct drm_device *dev = connector->dev;
 	enum drm_connector_status status = connector_status_connected;
 
+	/* ACPI lid methods were generally unreliable in this generation, so
+	 * don't even bother.
+	 */
+	if (IS_GEN2(dev))
+		return connector_status_connected;
+
 	if (!dmi_check_system(bad_lid_status) && !acpi_lid_open())
 		status = connector_status_disconnected;
 
@@ -1020,7 +1035,7 @@
 		return;
 	}
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		if ((I915_READ(PCH_LVDS) & LVDS_DETECTED) == 0)
 			return;
 		if (dev_priv->edp_support) {
@@ -1123,7 +1138,7 @@
 	 */
 
 	/* Ironlake: FIXME if still fail, not try pipe mode now */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		goto failed;
 
 	lvds = I915_READ(LVDS);
@@ -1144,7 +1159,7 @@
 		goto failed;
 
 out:
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		u32 pwm;
 		/* make sure PWM is enabled */
 		pwm = I915_READ(BLC_PWM_CPU_CTL2);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 2639591..d355d1d 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -172,7 +172,7 @@
 #define OFC_UPDATE		0x1
 
 #define OVERLAY_NONPHYSICAL(dev) (IS_G33(dev) || IS_I965G(dev))
-#define OVERLAY_EXISTS(dev) (!IS_G4X(dev) && !IS_IRONLAKE(dev))
+#define OVERLAY_EXISTS(dev) (!IS_G4X(dev) && !IS_IRONLAKE(dev) && !IS_GEN6(dev))
 
 
 static struct overlay_registers *intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
@@ -199,16 +199,11 @@
 
 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-        drm_i915_private_t *dev_priv = dev->dev_private;
-
 	if (OVERLAY_NONPHYSICAL(overlay->dev))
 		io_mapping_unmap_atomic(overlay->virt_addr);
 
 	overlay->virt_addr = NULL;
 
-	I915_READ(OVADD); /* flush wc cashes */
-
 	return;
 }
 
@@ -225,9 +220,7 @@
 	overlay->active = 1;
 	overlay->hw_wedged = NEEDS_WAIT_FOR_FLIP;
 
-	BEGIN_LP_RING(6);
-	OUT_RING(MI_FLUSH);
-	OUT_RING(MI_NOOP);
+	BEGIN_LP_RING(4);
 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_ON);
 	OUT_RING(overlay->flip_addr | OFC_UPDATE);
 	OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
@@ -267,9 +260,7 @@
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
 
-	BEGIN_LP_RING(4);
-	OUT_RING(MI_FLUSH);
-	OUT_RING(MI_NOOP);
+	BEGIN_LP_RING(2);
 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	OUT_RING(flip_addr);
         ADVANCE_LP_RING();
@@ -338,9 +329,7 @@
 	/* wait for overlay to go idle */
 	overlay->hw_wedged = SWITCH_OFF_STAGE_1;
 
-	BEGIN_LP_RING(6);
-	OUT_RING(MI_FLUSH);
-	OUT_RING(MI_NOOP);
+	BEGIN_LP_RING(4);
 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	OUT_RING(flip_addr);
         OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
@@ -358,9 +347,7 @@
 	/* turn overlay off */
 	overlay->hw_wedged = SWITCH_OFF_STAGE_2;
 
-	BEGIN_LP_RING(6);
-        OUT_RING(MI_FLUSH);
-        OUT_RING(MI_NOOP);
+	BEGIN_LP_RING(4);
         OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
 	OUT_RING(flip_addr);
         OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
@@ -435,9 +422,7 @@
 
 			overlay->hw_wedged = SWITCH_OFF_STAGE_2;
 
-			BEGIN_LP_RING(6);
-			OUT_RING(MI_FLUSH);
-			OUT_RING(MI_NOOP);
+			BEGIN_LP_RING(4);
 			OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
 			OUT_RING(flip_addr);
 			OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
@@ -1179,7 +1164,7 @@
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	mutex_unlock(&dev->mode_config.mutex);
-	drm_gem_object_unreference(new_bo);
+	drm_gem_object_unreference_unlocked(new_bo);
 	kfree(params);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 82678d3..48daee5 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -35,6 +35,7 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 #include "intel_sdvo_regs.h"
+#include <linux/dmi.h>
 
 static char *tv_format_names[] = {
 	"NTSC_M"   , "NTSC_J"  , "NTSC_443",
@@ -2283,6 +2284,25 @@
 		return 0x72;
 }
 
+static int intel_sdvo_bad_tv_callback(const struct dmi_system_id *id)
+{
+	DRM_DEBUG_KMS("Ignoring bad SDVO TV connector for %s\n", id->ident);
+	return 1;
+}
+
+static struct dmi_system_id intel_sdvo_bad_tv[] = {
+	{
+		.callback = intel_sdvo_bad_tv_callback,
+		.ident = "IntelG45/ICH10R/DME1737",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "IBM CORPORATION"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "4800784"),
+		},
+	},
+
+	{ }	/* terminating entry */
+};
+
 static bool
 intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags)
 {
@@ -2323,7 +2343,8 @@
 					(1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
 					(1 << INTEL_ANALOG_CLONE_BIT);
 		}
-	} else if (flags & SDVO_OUTPUT_SVID0) {
+	} else if ((flags & SDVO_OUTPUT_SVID0) &&
+		   !dmi_check_system(intel_sdvo_bad_tv)) {
 
 		sdvo_priv->controlled_output = SDVO_OUTPUT_SVID0;
 		encoder->encoder_type = DRM_MODE_ENCODER_TVDAC;
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 48c290b..32db806 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -16,7 +16,7 @@
              nv04_fifo.o nv10_fifo.o nv40_fifo.o nv50_fifo.o \
              nv04_graph.o nv10_graph.o nv20_graph.o \
              nv40_graph.o nv50_graph.o \
-             nv40_grctx.o \
+             nv40_grctx.o nv50_grctx.o \
              nv04_instmem.o nv50_instmem.o \
              nv50_crtc.o nv50_dac.o nv50_sor.o \
              nv50_cursor.o nv50_display.o nv50_fbcon.o \
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 48227e7..0e0730a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -11,6 +11,8 @@
 #include "nouveau_drm.h"
 #include "nv50_display.h"
 
+#include <linux/vga_switcheroo.h>
+
 #define NOUVEAU_DSM_SUPPORTED 0x00
 #define NOUVEAU_DSM_SUPPORTED_FUNCTIONS 0x00
 
@@ -28,31 +30,30 @@
 #define NOUVEAU_DSM_POWER_SPEED 0x01
 #define NOUVEAU_DSM_POWER_STAMINA 0x02
 
-static int nouveau_dsm(struct drm_device *dev, int func, int arg, int *result)
-{
-	static char muid[] = {
-		0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
-		0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-	};
+static struct nouveau_dsm_priv {
+	bool dsm_detected;
+	acpi_handle dhandle;
+	acpi_handle dsm_handle;
+} nouveau_dsm_priv;
 
-	struct pci_dev *pdev = dev->pdev;
-	struct acpi_handle *handle;
+static const char nouveau_dsm_muid[] = {
+	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
+	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
+};
+
+static int nouveau_dsm(acpi_handle handle, int func, int arg, int *result)
+{
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_object_list input;
 	union acpi_object params[4];
 	union acpi_object *obj;
 	int err;
 
-	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
-
-	if (!handle)
-		return -ENODEV;
-
 	input.count = 4;
 	input.pointer = params;
 	params[0].type = ACPI_TYPE_BUFFER;
-	params[0].buffer.length = sizeof(muid);
-	params[0].buffer.pointer = (char *)muid;
+	params[0].buffer.length = sizeof(nouveau_dsm_muid);
+	params[0].buffer.pointer = (char *)nouveau_dsm_muid;
 	params[1].type = ACPI_TYPE_INTEGER;
 	params[1].integer.value = 0x00000102;
 	params[2].type = ACPI_TYPE_INTEGER;
@@ -62,7 +63,7 @@
 
 	err = acpi_evaluate_object(handle, "_DSM", &input, &output);
 	if (err) {
-		NV_INFO(dev, "failed to evaluate _DSM: %d\n", err);
+		printk(KERN_INFO "failed to evaluate _DSM: %d\n", err);
 		return err;
 	}
 
@@ -86,40 +87,119 @@
 	return 0;
 }
 
-int nouveau_hybrid_setup(struct drm_device *dev)
+static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id)
 {
-	int result;
+	return nouveau_dsm(handle, NOUVEAU_DSM_LED, mux_id, NULL);
+}
 
-	if (nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STATE,
-								&result))
-		return -ENODEV;
-
-	NV_INFO(dev, "_DSM hardware status gave 0x%x\n", result);
-
-	if (result) { /* Ensure that the external GPU is enabled */
-		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_SPEED, NULL);
-		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_SPEED,
-									NULL);
-	} else { /* Stamina mode - disable the external GPU */
-		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_STAMINA,
-									NULL);
-		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STAMINA,
-									NULL);
-	}
-
+static int nouveau_dsm_set_discrete_state(acpi_handle handle, enum vga_switcheroo_state state)
+{
+	int arg;
+	if (state == VGA_SWITCHEROO_ON)
+		arg = NOUVEAU_DSM_POWER_SPEED;
+	else
+		arg = NOUVEAU_DSM_POWER_STAMINA;
+	nouveau_dsm(handle, NOUVEAU_DSM_POWER, arg, NULL);
 	return 0;
 }
 
-bool nouveau_dsm_probe(struct drm_device *dev)
+static int nouveau_dsm_switchto(enum vga_switcheroo_client_id id)
 {
-	int support = 0;
+	if (id == VGA_SWITCHEROO_IGD)
+		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dsm_handle, NOUVEAU_DSM_LED_STAMINA);
+	else
+		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dsm_handle, NOUVEAU_DSM_LED_SPEED);
+}
 
-	if (nouveau_dsm(dev, NOUVEAU_DSM_SUPPORTED,
-				NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &support))
+static int nouveau_dsm_power_state(enum vga_switcheroo_client_id id,
+				   enum vga_switcheroo_state state)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		return 0;
+
+	return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dsm_handle, state);
+}
+
+static int nouveau_dsm_init(void)
+{
+	return 0;
+}
+
+static int nouveau_dsm_get_client_id(struct pci_dev *pdev)
+{
+	if (nouveau_dsm_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return VGA_SWITCHEROO_IGD;
+	else
+		return VGA_SWITCHEROO_DIS;
+}
+
+static struct vga_switcheroo_handler nouveau_dsm_handler = {
+	.switchto = nouveau_dsm_switchto,
+	.power_state = nouveau_dsm_power_state,
+	.init = nouveau_dsm_init,
+	.get_client_id = nouveau_dsm_get_client_id,
+};
+
+static bool nouveau_dsm_pci_probe(struct pci_dev *pdev)
+{
+	acpi_handle dhandle, nvidia_handle;
+	acpi_status status;
+	int ret;
+	uint32_t result;
+
+	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+	status = acpi_get_handle(dhandle, "_DSM", &nvidia_handle);
+	if (ACPI_FAILURE(status)) {
+		return false;
+	}
+
+	ret= nouveau_dsm(nvidia_handle, NOUVEAU_DSM_SUPPORTED,
+			 NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &result);
+	if (ret < 0)
 		return false;
 
-	if (!support)
-		return false;
-
+	nouveau_dsm_priv.dhandle = dhandle;
+	nouveau_dsm_priv.dsm_handle = nvidia_handle;
 	return true;
 }
+
+static bool nouveau_dsm_detect(void)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	struct pci_dev *pdev = NULL;
+	int has_dsm = 0;
+	int vga_count = 0;
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_dsm |= (nouveau_dsm_pci_probe(pdev) == true);
+	}
+
+	if (vga_count == 2 && has_dsm) {
+		acpi_get_name(nouveau_dsm_priv.dsm_handle, ACPI_FULL_PATHNAME, &buffer);
+		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
+		       acpi_method_name);
+		nouveau_dsm_priv.dsm_detected = true;
+		return true;
+	}
+	return false;
+}
+
+void nouveau_register_dsm_handler(void)
+{
+	bool r;
+
+	r = nouveau_dsm_detect();
+	if (!r)
+		return;
+
+	vga_switcheroo_register_handler(&nouveau_dsm_handler);
+}
+
+void nouveau_unregister_dsm_handler(void)
+{
+	vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 0e9cd1d..71247da 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -311,11 +311,11 @@
 
 	/* C51 has misaligned regs on purpose. Marvellous */
 	if (reg & 0x2 ||
-	    (reg & 0x1 && dev_priv->VBIOS.pub.chip_version != 0x51))
+	    (reg & 0x1 && dev_priv->vbios.chip_version != 0x51))
 		NV_ERROR(dev, "======= misaligned reg 0x%08X =======\n", reg);
 
 	/* warn on C51 regs that haven't been verified accessible in tracing */
-	if (reg & 0x1 && dev_priv->VBIOS.pub.chip_version == 0x51 &&
+	if (reg & 0x1 && dev_priv->vbios.chip_version == 0x51 &&
 	    reg != 0x130d && reg != 0x1311 && reg != 0x60081d)
 		NV_WARN(dev, "=== C51 misaligned reg 0x%08X not verified ===\n",
 			reg);
@@ -420,7 +420,7 @@
 	LOG_OLD_VALUE(bios_rd32(bios, reg));
 	BIOSLOG(bios, "	Write: Reg: 0x%08X, Data: 0x%08X\n", reg, data);
 
-	if (dev_priv->VBIOS.execute) {
+	if (dev_priv->vbios.execute) {
 		still_alive();
 		nv_wr32(bios->dev, reg, data);
 	}
@@ -647,7 +647,7 @@
 	reg0 = (reg0 & 0xfff8ffff) | (pll.log2P << 16);
 	reg1 = (reg1 & 0xffff0000) | (pll.N1 << 8) | pll.M1;
 
-	if (dev_priv->VBIOS.execute) {
+	if (dev_priv->vbios.execute) {
 		still_alive();
 		nv_wr32(dev, reg + 4, reg1);
 		nv_wr32(dev, reg + 0, reg0);
@@ -689,7 +689,7 @@
 static int dcb_entry_idx_from_crtchead(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 
 	/*
 	 * For the results of this function to be correct, CR44 must have been
@@ -700,7 +700,7 @@
 
 	uint8_t dcb_entry = NVReadVgaCrtc5758(dev, bios->state.crtchead, 0);
 
-	if (dcb_entry > bios->bdcb.dcb.entries) {
+	if (dcb_entry > bios->dcb.entries) {
 		NV_ERROR(dev, "CR58 doesn't have a valid DCB entry currently "
 				"(%02X)\n", dcb_entry);
 		dcb_entry = 0x7f;	/* unused / invalid marker */
@@ -713,25 +713,26 @@
 init_i2c_device_find(struct drm_device *dev, int i2c_index)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct bios_parsed_dcb *bdcb = &dev_priv->VBIOS.bdcb;
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
 
 	if (i2c_index == 0xff) {
 		/* note: dcb_entry_idx_from_crtchead needs pre-script set-up */
 		int idx = dcb_entry_idx_from_crtchead(dev), shift = 0;
-		int default_indices = bdcb->i2c_default_indices;
+		int default_indices = dcb->i2c_default_indices;
 
-		if (idx != 0x7f && bdcb->dcb.entry[idx].i2c_upper_default)
+		if (idx != 0x7f && dcb->entry[idx].i2c_upper_default)
 			shift = 4;
 
 		i2c_index = (default_indices >> shift) & 0xf;
 	}
 	if (i2c_index == 0x80)	/* g80+ */
-		i2c_index = bdcb->i2c_default_indices & 0xf;
+		i2c_index = dcb->i2c_default_indices & 0xf;
 
 	return nouveau_i2c_find(dev, i2c_index);
 }
 
-static uint32_t get_tmds_index_reg(struct drm_device *dev, uint8_t mlv)
+static uint32_t
+get_tmds_index_reg(struct drm_device *dev, uint8_t mlv)
 {
 	/*
 	 * For mlv < 0x80, it is an index into a table of TMDS base addresses.
@@ -744,6 +745,7 @@
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
 	const int pramdac_offset[13] = {
 		0, 0, 0x8, 0, 0x2000, 0, 0, 0, 0x2008, 0, 0, 0, 0x2000 };
 	const uint32_t pramdac_table[4] = {
@@ -756,13 +758,12 @@
 		dcb_entry = dcb_entry_idx_from_crtchead(dev);
 		if (dcb_entry == 0x7f)
 			return 0;
-		dacoffset = pramdac_offset[
-				dev_priv->VBIOS.bdcb.dcb.entry[dcb_entry].or];
+		dacoffset = pramdac_offset[bios->dcb.entry[dcb_entry].or];
 		if (mlv == 0x81)
 			dacoffset ^= 8;
 		return 0x6808b0 + dacoffset;
 	} else {
-		if (mlv > ARRAY_SIZE(pramdac_table)) {
+		if (mlv >= ARRAY_SIZE(pramdac_table)) {
 			NV_ERROR(dev, "Magic Lookup Value too big (%02X)\n",
 									mlv);
 			return 0;
@@ -2574,19 +2575,19 @@
 
 	const uint32_t nv50_gpio_reg[4] = { 0xe104, 0xe108, 0xe280, 0xe284 };
 	const uint32_t nv50_gpio_ctl[2] = { 0xe100, 0xe28c };
-	const uint8_t *gpio_table = &bios->data[bios->bdcb.gpio_table_ptr];
+	const uint8_t *gpio_table = &bios->data[bios->dcb.gpio_table_ptr];
 	const uint8_t *gpio_entry;
 	int i;
 
 	if (!iexec->execute)
 		return 1;
 
-	if (bios->bdcb.version != 0x40) {
+	if (bios->dcb.version != 0x40) {
 		NV_ERROR(bios->dev, "DCB table not version 4.0\n");
 		return 0;
 	}
 
-	if (!bios->bdcb.gpio_table_ptr) {
+	if (!bios->dcb.gpio_table_ptr) {
 		NV_WARN(bios->dev, "Invalid pointer to INIT_8E table\n");
 		return 0;
 	}
@@ -3123,7 +3124,7 @@
 		      struct dcb_entry *dcbent, int head, bool dl)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct init_exec iexec = {true, false};
 
 	NV_TRACE(dev, "0x%04X: Parsing digital output script table\n",
@@ -3140,7 +3141,7 @@
 static int call_lvds_manufacturer_script(struct drm_device *dev, struct dcb_entry *dcbent, int head, enum LVDS_script script)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t sub = bios->data[bios->fp.xlated_entry + script] + (bios->fp.link_c_increment && dcbent->or & OUTPUT_C ? 1 : 0);
 	uint16_t scriptofs = ROM16(bios->data[bios->init_script_tbls_ptr + sub * 2]);
 
@@ -3194,7 +3195,7 @@
 	 * of a list of pxclks and script pointers.
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	unsigned int outputset = (dcbent->or == 4) ? 1 : 0;
 	uint16_t scriptptr = 0, clktable;
 	uint8_t clktableptr = 0;
@@ -3261,7 +3262,7 @@
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t lvds_ver = bios->data[bios->fp.lvdsmanufacturerpointer];
 	uint32_t sel_clk_binding, sel_clk;
 	int ret;
@@ -3395,7 +3396,7 @@
 #ifndef __powerpc__
 		NV_ERROR(dev, "Pointer to flat panel table invalid\n");
 #endif
-		bios->pub.digital_min_front_porch = 0x4b;
+		bios->digital_min_front_porch = 0x4b;
 		return 0;
 	}
 
@@ -3428,7 +3429,7 @@
 		 * fptable[4] is the minimum
 		 * RAMDAC_FP_HCRTC -> RAMDAC_FP_HSYNC_START gap
 		 */
-		bios->pub.digital_min_front_porch = fptable[4];
+		bios->digital_min_front_porch = fptable[4];
 		ofs = -7;
 		break;
 	default:
@@ -3467,7 +3468,7 @@
 
 	/* nv4x cards need both a strap value and fpindex of 0xf to use DDC */
 	if (lth.lvds_ver > 0x10)
-		bios->pub.fp_no_ddc = fpstrapping != 0xf || fpindex != 0xf;
+		bios->fp_no_ddc = fpstrapping != 0xf || fpindex != 0xf;
 
 	/*
 	 * If either the strap or xlated fpindex value are 0xf there is no
@@ -3491,7 +3492,7 @@
 bool nouveau_bios_fp_mode(struct drm_device *dev, struct drm_display_mode *mode)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t *mode_entry = &bios->data[bios->fp.mode_ptr];
 
 	if (!mode)	/* just checking whether we can produce a mode */
@@ -3562,11 +3563,11 @@
 	 * until later, when this function should be called with non-zero pxclk
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	int fpstrapping = get_fp_strap(dev, bios), lvdsmanufacturerindex = 0;
 	struct lvdstableheader lth;
 	uint16_t lvdsofs;
-	int ret, chip_version = bios->pub.chip_version;
+	int ret, chip_version = bios->chip_version;
 
 	ret = parse_lvds_manufacturer_table_header(dev, bios, &lth);
 	if (ret)
@@ -3682,7 +3683,7 @@
 			 uint16_t record, int record_len, int record_nr)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint32_t entry;
 	uint16_t table;
 	int i, v;
@@ -3716,7 +3717,7 @@
 		      int *length)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t *table;
 
 	if (!bios->display.dp_table_ptr) {
@@ -3725,7 +3726,7 @@
 	}
 	table = &bios->data[bios->display.dp_table_ptr];
 
-	if (table[0] != 0x21) {
+	if (table[0] != 0x20 && table[0] != 0x21) {
 		NV_ERROR(dev, "DisplayPort table version 0x%02x unknown\n",
 			 table[0]);
 		return NULL;
@@ -3765,7 +3766,7 @@
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint8_t *table = &bios->data[bios->display.script_table_ptr];
 	uint8_t *otable = NULL;
 	uint16_t script;
@@ -3918,8 +3919,8 @@
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
-	int cv = bios->pub.chip_version;
+	struct nvbios *bios = &dev_priv->vbios;
+	int cv = bios->chip_version;
 	uint16_t clktable = 0, scriptptr;
 	uint32_t sel_clk_binding, sel_clk;
 
@@ -3978,8 +3979,8 @@
 	 */
 
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
-	int cv = bios->pub.chip_version, pllindex = 0;
+	struct nvbios *bios = &dev_priv->vbios;
+	int cv = bios->chip_version, pllindex = 0;
 	uint8_t pll_lim_ver = 0, headerlen = 0, recordlen = 0, entries = 0;
 	uint32_t crystal_strap_mask, crystal_straps;
 
@@ -4332,7 +4333,7 @@
 	 */
 
 	bios->major_version = bios->data[offset + 3];
-	bios->pub.chip_version = bios->data[offset + 2];
+	bios->chip_version = bios->data[offset + 2];
 	NV_TRACE(dev, "Bios version %02x.%02x.%02x.%02x\n",
 		 bios->data[offset + 3], bios->data[offset + 2],
 		 bios->data[offset + 1], bios->data[offset]);
@@ -4402,7 +4403,7 @@
 	}
 
 	/* First entry is normal dac, 2nd tv-out perhaps? */
-	bios->pub.dactestval = ROM32(bios->data[load_table_ptr + headerlen]) & 0x3ff;
+	bios->dactestval = ROM32(bios->data[load_table_ptr + headerlen]) & 0x3ff;
 
 	return 0;
 }
@@ -4526,8 +4527,8 @@
 		return -ENOSYS;
 	}
 
-	bios->pub.dactestval = ROM32(bios->data[daccmpoffset + dacheaderlen]);
-	bios->pub.tvdactestval = ROM32(bios->data[daccmpoffset + dacheaderlen + 4]);
+	bios->dactestval = ROM32(bios->data[daccmpoffset + dacheaderlen]);
+	bios->tvdactestval = ROM32(bios->data[daccmpoffset + dacheaderlen + 4]);
 
 	return 0;
 }
@@ -4796,11 +4797,11 @@
 	uint16_t legacy_scripts_offset, legacy_i2c_offset;
 
 	/* load needed defaults in case we can't parse this info */
-	bios->bdcb.dcb.i2c[0].write = NV_CIO_CRE_DDC_WR__INDEX;
-	bios->bdcb.dcb.i2c[0].read = NV_CIO_CRE_DDC_STATUS__INDEX;
-	bios->bdcb.dcb.i2c[1].write = NV_CIO_CRE_DDC0_WR__INDEX;
-	bios->bdcb.dcb.i2c[1].read = NV_CIO_CRE_DDC0_STATUS__INDEX;
-	bios->pub.digital_min_front_porch = 0x4b;
+	bios->dcb.i2c[0].write = NV_CIO_CRE_DDC_WR__INDEX;
+	bios->dcb.i2c[0].read = NV_CIO_CRE_DDC_STATUS__INDEX;
+	bios->dcb.i2c[1].write = NV_CIO_CRE_DDC0_WR__INDEX;
+	bios->dcb.i2c[1].read = NV_CIO_CRE_DDC0_STATUS__INDEX;
+	bios->digital_min_front_porch = 0x4b;
 	bios->fmaxvco = 256000;
 	bios->fminvco = 128000;
 	bios->fp.duallink_transition_clk = 90000;
@@ -4907,10 +4908,10 @@
 	bios->legacy.i2c_indices.crt = bios->data[legacy_i2c_offset];
 	bios->legacy.i2c_indices.tv = bios->data[legacy_i2c_offset + 1];
 	bios->legacy.i2c_indices.panel = bios->data[legacy_i2c_offset + 2];
-	bios->bdcb.dcb.i2c[0].write = bios->data[legacy_i2c_offset + 4];
-	bios->bdcb.dcb.i2c[0].read = bios->data[legacy_i2c_offset + 5];
-	bios->bdcb.dcb.i2c[1].write = bios->data[legacy_i2c_offset + 6];
-	bios->bdcb.dcb.i2c[1].read = bios->data[legacy_i2c_offset + 7];
+	bios->dcb.i2c[0].write = bios->data[legacy_i2c_offset + 4];
+	bios->dcb.i2c[0].read = bios->data[legacy_i2c_offset + 5];
+	bios->dcb.i2c[1].write = bios->data[legacy_i2c_offset + 6];
+	bios->dcb.i2c[1].read = bios->data[legacy_i2c_offset + 7];
 
 	if (bmplength > 74) {
 		bios->fmaxvco = ROM32(bmp[67]);
@@ -4984,7 +4985,8 @@
 		else
 			NV_WARN(dev,
 				"DCB I2C table has more entries than indexable "
-				"(%d entries, max index 15)\n", i2ctable[2]);
+				"(%d entries, max %d)\n", i2ctable[2],
+				DCB_MAX_NUM_I2C_ENTRIES);
 		entry_len = i2ctable[3];
 		/* [4] is i2c_default_indices, read in parse_dcb_table() */
 	}
@@ -5000,8 +5002,8 @@
 
 	if (index == 0xf)
 		return 0;
-	if (index > i2c_entries) {
-		NV_ERROR(dev, "DCB I2C index too big (%d > %d)\n",
+	if (index >= i2c_entries) {
+		NV_ERROR(dev, "DCB I2C index too big (%d >= %d)\n",
 			 index, i2ctable[2]);
 		return -ENOENT;
 	}
@@ -5036,7 +5038,7 @@
 static struct dcb_gpio_entry *
 new_gpio_entry(struct nvbios *bios)
 {
-	struct parsed_dcb_gpio *gpio = &bios->bdcb.gpio;
+	struct dcb_gpio_table *gpio = &bios->dcb.gpio;
 
 	return &gpio->entry[gpio->entries++];
 }
@@ -5045,14 +5047,14 @@
 nouveau_bios_gpio_entry(struct drm_device *dev, enum dcb_gpio_tag tag)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	int i;
 
-	for (i = 0; i < bios->bdcb.gpio.entries; i++) {
-		if (bios->bdcb.gpio.entry[i].tag != tag)
+	for (i = 0; i < bios->dcb.gpio.entries; i++) {
+		if (bios->dcb.gpio.entry[i].tag != tag)
 			continue;
 
-		return &bios->bdcb.gpio.entry[i];
+		return &bios->dcb.gpio.entry[i];
 	}
 
 	return NULL;
@@ -5100,7 +5102,7 @@
 parse_dcb_gpio_table(struct nvbios *bios)
 {
 	struct drm_device *dev = bios->dev;
-	uint16_t gpio_table_ptr = bios->bdcb.gpio_table_ptr;
+	uint16_t gpio_table_ptr = bios->dcb.gpio_table_ptr;
 	uint8_t *gpio_table = &bios->data[gpio_table_ptr];
 	int header_len = gpio_table[1],
 	    entries = gpio_table[2],
@@ -5108,7 +5110,7 @@
 	void (*parse_entry)(struct nvbios *, uint16_t) = NULL;
 	int i;
 
-	if (bios->bdcb.version >= 0x40) {
+	if (bios->dcb.version >= 0x40) {
 		if (gpio_table_ptr && entry_len != 4) {
 			NV_WARN(dev, "Invalid DCB GPIO table entry length.\n");
 			return;
@@ -5116,7 +5118,7 @@
 
 		parse_entry = parse_dcb40_gpio_entry;
 
-	} else if (bios->bdcb.version >= 0x30) {
+	} else if (bios->dcb.version >= 0x30) {
 		if (gpio_table_ptr && entry_len != 2) {
 			NV_WARN(dev, "Invalid DCB GPIO table entry length.\n");
 			return;
@@ -5124,7 +5126,7 @@
 
 		parse_entry = parse_dcb30_gpio_entry;
 
-	} else if (bios->bdcb.version >= 0x22) {
+	} else if (bios->dcb.version >= 0x22) {
 		/*
 		 * DCBs older than v3.0 don't really have a GPIO
 		 * table, instead they keep some GPIO info at fixed
@@ -5158,30 +5160,67 @@
 nouveau_bios_connector_entry(struct drm_device *dev, int index)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct dcb_connector_table_entry *cte;
 
-	if (index >= bios->bdcb.connector.entries)
+	if (index >= bios->dcb.connector.entries)
 		return NULL;
 
-	cte = &bios->bdcb.connector.entry[index];
+	cte = &bios->dcb.connector.entry[index];
 	if (cte->type == 0xff)
 		return NULL;
 
 	return cte;
 }
 
+static enum dcb_connector_type
+divine_connector_type(struct nvbios *bios, int index)
+{
+	struct dcb_table *dcb = &bios->dcb;
+	unsigned encoders = 0, type = DCB_CONNECTOR_NONE;
+	int i;
+
+	for (i = 0; i < dcb->entries; i++) {
+		if (dcb->entry[i].connector == index)
+			encoders |= (1 << dcb->entry[i].type);
+	}
+
+	if (encoders & (1 << OUTPUT_DP)) {
+		if (encoders & (1 << OUTPUT_TMDS))
+			type = DCB_CONNECTOR_DP;
+		else
+			type = DCB_CONNECTOR_eDP;
+	} else
+	if (encoders & (1 << OUTPUT_TMDS)) {
+		if (encoders & (1 << OUTPUT_ANALOG))
+			type = DCB_CONNECTOR_DVI_I;
+		else
+			type = DCB_CONNECTOR_DVI_D;
+	} else
+	if (encoders & (1 << OUTPUT_ANALOG)) {
+		type = DCB_CONNECTOR_VGA;
+	} else
+	if (encoders & (1 << OUTPUT_LVDS)) {
+		type = DCB_CONNECTOR_LVDS;
+	} else
+	if (encoders & (1 << OUTPUT_TV)) {
+		type = DCB_CONNECTOR_TV_0;
+	}
+
+	return type;
+}
+
 static void
 parse_dcb_connector_table(struct nvbios *bios)
 {
 	struct drm_device *dev = bios->dev;
-	struct dcb_connector_table *ct = &bios->bdcb.connector;
+	struct dcb_connector_table *ct = &bios->dcb.connector;
 	struct dcb_connector_table_entry *cte;
-	uint8_t *conntab = &bios->data[bios->bdcb.connector_table_ptr];
+	uint8_t *conntab = &bios->data[bios->dcb.connector_table_ptr];
 	uint8_t *entry;
 	int i;
 
-	if (!bios->bdcb.connector_table_ptr) {
+	if (!bios->dcb.connector_table_ptr) {
 		NV_DEBUG_KMS(dev, "No DCB connector table present\n");
 		return;
 	}
@@ -5203,6 +5242,7 @@
 			cte->entry = ROM16(entry[0]);
 		else
 			cte->entry = ROM32(entry[0]);
+
 		cte->type  = (cte->entry & 0x000000ff) >> 0;
 		cte->index = (cte->entry & 0x00000f00) >> 8;
 		switch (cte->entry & 0x00033000) {
@@ -5228,10 +5268,33 @@
 
 		NV_INFO(dev, "  %d: 0x%08x: type 0x%02x idx %d tag 0x%02x\n",
 			i, cte->entry, cte->type, cte->index, cte->gpio_tag);
+
+		/* check for known types, fallback to guessing the type
+		 * from attached encoders if we hit an unknown.
+		 */
+		switch (cte->type) {
+		case DCB_CONNECTOR_VGA:
+		case DCB_CONNECTOR_TV_0:
+		case DCB_CONNECTOR_TV_1:
+		case DCB_CONNECTOR_TV_3:
+		case DCB_CONNECTOR_DVI_I:
+		case DCB_CONNECTOR_DVI_D:
+		case DCB_CONNECTOR_LVDS:
+		case DCB_CONNECTOR_DP:
+		case DCB_CONNECTOR_eDP:
+		case DCB_CONNECTOR_HDMI_0:
+		case DCB_CONNECTOR_HDMI_1:
+			break;
+		default:
+			cte->type = divine_connector_type(bios, cte->index);
+			NV_WARN(dev, "unknown type, using 0x%02x", cte->type);
+			break;
+		}
+
 	}
 }
 
-static struct dcb_entry *new_dcb_entry(struct parsed_dcb *dcb)
+static struct dcb_entry *new_dcb_entry(struct dcb_table *dcb)
 {
 	struct dcb_entry *entry = &dcb->entry[dcb->entries];
 
@@ -5241,7 +5304,7 @@
 	return entry;
 }
 
-static void fabricate_vga_output(struct parsed_dcb *dcb, int i2c, int heads)
+static void fabricate_vga_output(struct dcb_table *dcb, int i2c, int heads)
 {
 	struct dcb_entry *entry = new_dcb_entry(dcb);
 
@@ -5252,7 +5315,7 @@
 	/* "or" mostly unused in early gen crt modesetting, 0 is fine */
 }
 
-static void fabricate_dvi_i_output(struct parsed_dcb *dcb, bool twoHeads)
+static void fabricate_dvi_i_output(struct dcb_table *dcb, bool twoHeads)
 {
 	struct dcb_entry *entry = new_dcb_entry(dcb);
 
@@ -5279,7 +5342,7 @@
 #endif
 }
 
-static void fabricate_tv_output(struct parsed_dcb *dcb, bool twoHeads)
+static void fabricate_tv_output(struct dcb_table *dcb, bool twoHeads)
 {
 	struct dcb_entry *entry = new_dcb_entry(dcb);
 
@@ -5290,13 +5353,13 @@
 }
 
 static bool
-parse_dcb20_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
+parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb,
 		  uint32_t conn, uint32_t conf, struct dcb_entry *entry)
 {
 	entry->type = conn & 0xf;
 	entry->i2c_index = (conn >> 4) & 0xf;
 	entry->heads = (conn >> 8) & 0xf;
-	if (bdcb->version >= 0x40)
+	if (dcb->version >= 0x40)
 		entry->connector = (conn >> 12) & 0xf;
 	entry->bus = (conn >> 16) & 0xf;
 	entry->location = (conn >> 20) & 0x3;
@@ -5314,7 +5377,7 @@
 		 * Although the rest of a CRT conf dword is usually
 		 * zeros, mac biosen have stuff there so we must mask
 		 */
-		entry->crtconf.maxfreq = (bdcb->version < 0x30) ?
+		entry->crtconf.maxfreq = (dcb->version < 0x30) ?
 					 (conf & 0xffff) * 10 :
 					 (conf & 0xff) * 10000;
 		break;
@@ -5323,7 +5386,7 @@
 		uint32_t mask;
 		if (conf & 0x1)
 			entry->lvdsconf.use_straps_for_mode = true;
-		if (bdcb->version < 0x22) {
+		if (dcb->version < 0x22) {
 			mask = ~0xd;
 			/*
 			 * The laptop in bug 14567 lies and claims to not use
@@ -5347,7 +5410,7 @@
 			 * Until we even try to use these on G8x, it's
 			 * useless reporting unknown bits.  They all are.
 			 */
-			if (bdcb->version >= 0x40)
+			if (dcb->version >= 0x40)
 				break;
 
 			NV_ERROR(dev, "Unknown LVDS configuration bits, "
@@ -5357,7 +5420,7 @@
 		}
 	case OUTPUT_TV:
 	{
-		if (bdcb->version >= 0x30)
+		if (dcb->version >= 0x30)
 			entry->tvconf.has_component_output = conf & (0x8 << 4);
 		else
 			entry->tvconf.has_component_output = false;
@@ -5384,8 +5447,10 @@
 		break;
 	case 0xe:
 		/* weird g80 mobile type that "nv" treats as a terminator */
-		bdcb->dcb.entries--;
+		dcb->entries--;
 		return false;
+	default:
+		break;
 	}
 
 	/* unsure what DCB version introduces this, 3.0? */
@@ -5396,7 +5461,7 @@
 }
 
 static bool
-parse_dcb15_entry(struct drm_device *dev, struct parsed_dcb *dcb,
+parse_dcb15_entry(struct drm_device *dev, struct dcb_table *dcb,
 		  uint32_t conn, uint32_t conf, struct dcb_entry *entry)
 {
 	switch (conn & 0x0000000f) {
@@ -5462,27 +5527,27 @@
 	return true;
 }
 
-static bool parse_dcb_entry(struct drm_device *dev, struct bios_parsed_dcb *bdcb,
+static bool parse_dcb_entry(struct drm_device *dev, struct dcb_table *dcb,
 			    uint32_t conn, uint32_t conf)
 {
-	struct dcb_entry *entry = new_dcb_entry(&bdcb->dcb);
+	struct dcb_entry *entry = new_dcb_entry(dcb);
 	bool ret;
 
-	if (bdcb->version >= 0x20)
-		ret = parse_dcb20_entry(dev, bdcb, conn, conf, entry);
+	if (dcb->version >= 0x20)
+		ret = parse_dcb20_entry(dev, dcb, conn, conf, entry);
 	else
-		ret = parse_dcb15_entry(dev, &bdcb->dcb, conn, conf, entry);
+		ret = parse_dcb15_entry(dev, dcb, conn, conf, entry);
 	if (!ret)
 		return ret;
 
-	read_dcb_i2c_entry(dev, bdcb->version, bdcb->i2c_table,
-			   entry->i2c_index, &bdcb->dcb.i2c[entry->i2c_index]);
+	read_dcb_i2c_entry(dev, dcb->version, dcb->i2c_table,
+			   entry->i2c_index, &dcb->i2c[entry->i2c_index]);
 
 	return true;
 }
 
 static
-void merge_like_dcb_entries(struct drm_device *dev, struct parsed_dcb *dcb)
+void merge_like_dcb_entries(struct drm_device *dev, struct dcb_table *dcb)
 {
 	/*
 	 * DCB v2.0 lists each output combination separately.
@@ -5534,8 +5599,7 @@
 parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct bios_parsed_dcb *bdcb = &bios->bdcb;
-	struct parsed_dcb *dcb;
+	struct dcb_table *dcb = &bios->dcb;
 	uint16_t dcbptr = 0, i2ctabptr = 0;
 	uint8_t *dcbtable;
 	uint8_t headerlen = 0x4, entries = DCB_MAX_NUM_ENTRIES;
@@ -5543,9 +5607,6 @@
 	int recordlength = 8, confofs = 4;
 	int i;
 
-	dcb = bios->pub.dcb = &bdcb->dcb;
-	dcb->entries = 0;
-
 	/* get the offset from 0x36 */
 	if (dev_priv->card_type > NV_04) {
 		dcbptr = ROM16(bios->data[0x36]);
@@ -5567,21 +5628,21 @@
 	dcbtable = &bios->data[dcbptr];
 
 	/* get DCB version */
-	bdcb->version = dcbtable[0];
+	dcb->version = dcbtable[0];
 	NV_TRACE(dev, "Found Display Configuration Block version %d.%d\n",
-		 bdcb->version >> 4, bdcb->version & 0xf);
+		 dcb->version >> 4, dcb->version & 0xf);
 
-	if (bdcb->version >= 0x20) { /* NV17+ */
+	if (dcb->version >= 0x20) { /* NV17+ */
 		uint32_t sig;
 
-		if (bdcb->version >= 0x30) { /* NV40+ */
+		if (dcb->version >= 0x30) { /* NV40+ */
 			headerlen = dcbtable[1];
 			entries = dcbtable[2];
 			recordlength = dcbtable[3];
 			i2ctabptr = ROM16(dcbtable[4]);
 			sig = ROM32(dcbtable[6]);
-			bdcb->gpio_table_ptr = ROM16(dcbtable[10]);
-			bdcb->connector_table_ptr = ROM16(dcbtable[20]);
+			dcb->gpio_table_ptr = ROM16(dcbtable[10]);
+			dcb->connector_table_ptr = ROM16(dcbtable[20]);
 		} else {
 			i2ctabptr = ROM16(dcbtable[2]);
 			sig = ROM32(dcbtable[4]);
@@ -5593,7 +5654,7 @@
 					"signature (%08X)\n", sig);
 			return -EINVAL;
 		}
-	} else if (bdcb->version >= 0x15) { /* some NV11 and NV20 */
+	} else if (dcb->version >= 0x15) { /* some NV11 and NV20 */
 		char sig[8] = { 0 };
 
 		strncpy(sig, (char *)&dcbtable[-7], 7);
@@ -5641,14 +5702,11 @@
 	if (!i2ctabptr)
 		NV_WARN(dev, "No pointer to DCB I2C port table\n");
 	else {
-		bdcb->i2c_table = &bios->data[i2ctabptr];
-		if (bdcb->version >= 0x30)
-			bdcb->i2c_default_indices = bdcb->i2c_table[4];
+		dcb->i2c_table = &bios->data[i2ctabptr];
+		if (dcb->version >= 0x30)
+			dcb->i2c_default_indices = dcb->i2c_table[4];
 	}
 
-	parse_dcb_gpio_table(bios);
-	parse_dcb_connector_table(bios);
-
 	if (entries > DCB_MAX_NUM_ENTRIES)
 		entries = DCB_MAX_NUM_ENTRIES;
 
@@ -5673,7 +5731,7 @@
 		NV_TRACEWARN(dev, "Raw DCB entry %d: %08x %08x\n",
 			     dcb->entries, connection, config);
 
-		if (!parse_dcb_entry(dev, bdcb, connection, config))
+		if (!parse_dcb_entry(dev, dcb, connection, config))
 			break;
 	}
 
@@ -5681,18 +5739,22 @@
 	 * apart for v2.1+ not being known for requiring merging, this
 	 * guarantees dcbent->index is the index of the entry in the rom image
 	 */
-	if (bdcb->version < 0x21)
+	if (dcb->version < 0x21)
 		merge_like_dcb_entries(dev, dcb);
 
-	return dcb->entries ? 0 : -ENXIO;
+	if (!dcb->entries)
+		return -ENXIO;
+
+	parse_dcb_gpio_table(bios);
+	parse_dcb_connector_table(bios);
+	return 0;
 }
 
 static void
 fixup_legacy_connector(struct nvbios *bios)
 {
-	struct bios_parsed_dcb *bdcb = &bios->bdcb;
-	struct parsed_dcb *dcb = &bdcb->dcb;
-	int high = 0, i;
+	struct dcb_table *dcb = &bios->dcb;
+	int i, i2c, i2c_conn[DCB_MAX_NUM_I2C_ENTRIES] = { };
 
 	/*
 	 * DCB 3.0 also has the table in most cases, but there are some cards
@@ -5700,9 +5762,11 @@
 	 * indices are all 0.  We don't need the connector indices on pre-G80
 	 * chips (yet?) so limit the use to DCB 4.0 and above.
 	 */
-	if (bdcb->version >= 0x40)
+	if (dcb->version >= 0x40)
 		return;
 
+	dcb->connector.entries = 0;
+
 	/*
 	 * No known connector info before v3.0, so make it up.  the rule here
 	 * is: anything on the same i2c bus is considered to be on the same
@@ -5710,37 +5774,38 @@
 	 * its own unique connector index.
 	 */
 	for (i = 0; i < dcb->entries; i++) {
-		if (dcb->entry[i].i2c_index == 0xf)
-			continue;
-
 		/*
 		 * Ignore the I2C index for on-chip TV-out, as there
 		 * are cards with bogus values (nv31m in bug 23212),
 		 * and it's otherwise useless.
 		 */
 		if (dcb->entry[i].type == OUTPUT_TV &&
-		    dcb->entry[i].location == DCB_LOC_ON_CHIP) {
+		    dcb->entry[i].location == DCB_LOC_ON_CHIP)
 			dcb->entry[i].i2c_index = 0xf;
+		i2c = dcb->entry[i].i2c_index;
+
+		if (i2c_conn[i2c]) {
+			dcb->entry[i].connector = i2c_conn[i2c] - 1;
 			continue;
 		}
 
-		dcb->entry[i].connector = dcb->entry[i].i2c_index;
-		if (dcb->entry[i].connector > high)
-			high = dcb->entry[i].connector;
+		dcb->entry[i].connector = dcb->connector.entries++;
+		if (i2c != 0xf)
+			i2c_conn[i2c] = dcb->connector.entries;
 	}
 
-	for (i = 0; i < dcb->entries; i++) {
-		if (dcb->entry[i].i2c_index != 0xf)
-			continue;
-
-		dcb->entry[i].connector = ++high;
+	/* Fake the connector table as well as just connector indices */
+	for (i = 0; i < dcb->connector.entries; i++) {
+		dcb->connector.entry[i].index = i;
+		dcb->connector.entry[i].type = divine_connector_type(bios, i);
+		dcb->connector.entry[i].gpio_tag = 0xff;
 	}
 }
 
 static void
 fixup_legacy_i2c(struct nvbios *bios)
 {
-	struct parsed_dcb *dcb = &bios->bdcb.dcb;
+	struct dcb_table *dcb = &bios->dcb;
 	int i;
 
 	for (i = 0; i < dcb->entries; i++) {
@@ -5826,7 +5891,7 @@
 uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	const uint8_t edid_sig[] = {
 			0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
 	uint16_t offset = 0;
@@ -5859,7 +5924,7 @@
 			    struct dcb_entry *dcbent)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct init_exec iexec = { true, false };
 
 	mutex_lock(&bios->lock);
@@ -5872,7 +5937,7 @@
 static bool NVInitVBIOS(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 
 	memset(bios, 0, sizeof(struct nvbios));
 	mutex_init(&bios->lock);
@@ -5888,7 +5953,7 @@
 static int nouveau_parse_vbios_struct(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	const uint8_t bit_signature[] = { 0xff, 0xb8, 'B', 'I', 'T' };
 	const uint8_t bmp_signature[] = { 0xff, 0x7f, 'N', 'V', 0x0 };
 	int offset;
@@ -5915,7 +5980,7 @@
 nouveau_run_vbios_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	int i, ret = 0;
 
 	NVLockVgaCrtcs(dev, false);
@@ -5946,9 +6011,9 @@
 	}
 
 	if (dev_priv->card_type >= NV_50) {
-		for (i = 0; i < bios->bdcb.dcb.entries; i++) {
+		for (i = 0; i < bios->dcb.entries; i++) {
 			nouveau_bios_run_display_table(dev,
-						       &bios->bdcb.dcb.entry[i],
+						       &bios->dcb.entry[i],
 						       0, 0);
 		}
 	}
@@ -5962,11 +6027,11 @@
 nouveau_bios_i2c_devices_takedown(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	struct dcb_i2c_entry *entry;
 	int i;
 
-	entry = &bios->bdcb.dcb.i2c[0];
+	entry = &bios->dcb.i2c[0];
 	for (i = 0; i < DCB_MAX_NUM_I2C_ENTRIES; i++, entry++)
 		nouveau_i2c_fini(dev, entry);
 }
@@ -5975,13 +6040,11 @@
 nouveau_bios_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint32_t saved_nv_pextdev_boot_0;
 	bool was_locked;
 	int ret;
 
-	dev_priv->vbios = &bios->pub;
-
 	if (!NVInitVBIOS(dev))
 		return -ENODEV;
 
@@ -6023,10 +6086,8 @@
 	bios_wr32(bios, NV_PEXTDEV_BOOT_0, saved_nv_pextdev_boot_0);
 
 	ret = nouveau_run_vbios_init(dev);
-	if (ret) {
-		dev_priv->vbios = NULL;
+	if (ret)
 		return ret;
-	}
 
 	/* feature_byte on BMP is poor, but init always sets CR4B */
 	was_locked = NVLockVgaCrtcs(dev, false);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index fd94bd6..9f688aa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -34,9 +34,67 @@
 
 #define DCB_LOC_ON_CHIP 0
 
+struct dcb_i2c_entry {
+	uint8_t port_type;
+	uint8_t read, write;
+	struct nouveau_i2c_chan *chan;
+};
+
+enum dcb_gpio_tag {
+	DCB_GPIO_TVDAC0 = 0xc,
+	DCB_GPIO_TVDAC1 = 0x2d,
+};
+
+struct dcb_gpio_entry {
+	enum dcb_gpio_tag tag;
+	int line;
+	bool invert;
+};
+
+struct dcb_gpio_table {
+	int entries;
+	struct dcb_gpio_entry entry[DCB_MAX_NUM_GPIO_ENTRIES];
+};
+
+enum dcb_connector_type {
+	DCB_CONNECTOR_VGA = 0x00,
+	DCB_CONNECTOR_TV_0 = 0x10,
+	DCB_CONNECTOR_TV_1 = 0x11,
+	DCB_CONNECTOR_TV_3 = 0x13,
+	DCB_CONNECTOR_DVI_I = 0x30,
+	DCB_CONNECTOR_DVI_D = 0x31,
+	DCB_CONNECTOR_LVDS = 0x40,
+	DCB_CONNECTOR_DP = 0x46,
+	DCB_CONNECTOR_eDP = 0x47,
+	DCB_CONNECTOR_HDMI_0 = 0x60,
+	DCB_CONNECTOR_HDMI_1 = 0x61,
+	DCB_CONNECTOR_NONE = 0xff
+};
+
+struct dcb_connector_table_entry {
+	uint32_t entry;
+	enum dcb_connector_type type;
+	uint8_t index;
+	uint8_t gpio_tag;
+};
+
+struct dcb_connector_table {
+	int entries;
+	struct dcb_connector_table_entry entry[DCB_MAX_NUM_CONNECTOR_ENTRIES];
+};
+
+enum dcb_type {
+	OUTPUT_ANALOG = 0,
+	OUTPUT_TV = 1,
+	OUTPUT_TMDS = 2,
+	OUTPUT_LVDS = 3,
+	OUTPUT_DP = 6,
+	OUTPUT_ANY = -1
+};
+
 struct dcb_entry {
 	int index;	/* may not be raw dcb index if merging has happened */
-	uint8_t type;
+	enum dcb_type type;
 	uint8_t i2c_index;
 	uint8_t heads;
 	uint8_t connector;
@@ -71,69 +129,22 @@
 	bool i2c_upper_default;
 };
 
-struct dcb_i2c_entry {
-	uint8_t port_type;
-	uint8_t read, write;
-	struct nouveau_i2c_chan *chan;
-};
-
-struct parsed_dcb {
-	int entries;
-	struct dcb_entry entry[DCB_MAX_NUM_ENTRIES];
-	struct dcb_i2c_entry i2c[DCB_MAX_NUM_I2C_ENTRIES];
-};
-
-enum dcb_gpio_tag {
-	DCB_GPIO_TVDAC0 = 0xc,
-	DCB_GPIO_TVDAC1 = 0x2d,
-};
-
-struct dcb_gpio_entry {
-	enum dcb_gpio_tag tag;
-	int line;
-	bool invert;
-};
-
-struct parsed_dcb_gpio {
-	int entries;
-	struct dcb_gpio_entry entry[DCB_MAX_NUM_GPIO_ENTRIES];
-};
-
-struct dcb_connector_table_entry {
-	uint32_t entry;
-	uint8_t type;
-	uint8_t index;
-	uint8_t gpio_tag;
-};
-
-struct dcb_connector_table {
-	int entries;
-	struct dcb_connector_table_entry entry[DCB_MAX_NUM_CONNECTOR_ENTRIES];
-};
-
-struct bios_parsed_dcb {
+struct dcb_table {
 	uint8_t version;
 
-	struct parsed_dcb dcb;
+	int entries;
+	struct dcb_entry entry[DCB_MAX_NUM_ENTRIES];
 
 	uint8_t *i2c_table;
 	uint8_t i2c_default_indices;
+	struct dcb_i2c_entry i2c[DCB_MAX_NUM_I2C_ENTRIES];
 
 	uint16_t gpio_table_ptr;
-	struct parsed_dcb_gpio gpio;
+	struct dcb_gpio_table gpio;
 	uint16_t connector_table_ptr;
 	struct dcb_connector_table connector;
 };
 
-enum nouveau_encoder_type {
-	OUTPUT_ANALOG = 0,
-	OUTPUT_TV = 1,
-	OUTPUT_TMDS = 2,
-	OUTPUT_LVDS = 3,
-	OUTPUT_DP = 6,
-	OUTPUT_ANY = -1
-};
-
 enum nouveau_or {
 	OUTPUT_A = (1 << 0),
 	OUTPUT_B = (1 << 1),
@@ -190,8 +201,8 @@
 	int refclk;
 };
 
-struct nouveau_bios_info {
-	struct parsed_dcb *dcb;
+struct nvbios {
+	struct drm_device *dev;
 
 	uint8_t chip_version;
 
@@ -199,11 +210,6 @@
 	uint32_t tvdactestval;
 	uint8_t digital_min_front_porch;
 	bool fp_no_ddc;
-};
-
-struct nvbios {
-	struct drm_device *dev;
-	struct nouveau_bios_info pub;
 
 	struct mutex lock;
 
@@ -234,7 +240,7 @@
 	uint16_t some_script_ptr; /* BIT I + 14 */
 	uint16_t init96_tbl_ptr; /* BIT I + 16 */
 
-	struct bios_parsed_dcb bdcb;
+	struct dcb_table dcb;
 
 	struct {
 		int crtchead;
diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c
index ee2b845..88f9bc0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_calc.c
+++ b/drivers/gpu/drm/nouveau/nouveau_calc.c
@@ -274,7 +274,7 @@
 	 * returns calculated clock
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int cv = dev_priv->vbios->chip_version;
+	int cv = dev_priv->vbios.chip_version;
 	int minvco = pll_lim->vco1.minfreq, maxvco = pll_lim->vco1.maxfreq;
 	int minM = pll_lim->vco1.min_m, maxM = pll_lim->vco1.max_m;
 	int minN = pll_lim->vco1.min_n, maxN = pll_lim->vco1.max_n;
@@ -373,7 +373,7 @@
 	 * returns calculated clock
 	 */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chip_version = dev_priv->vbios->chip_version;
+	int chip_version = dev_priv->vbios.chip_version;
 	int minvco1 = pll_lim->vco1.minfreq, maxvco1 = pll_lim->vco1.maxfreq;
 	int minvco2 = pll_lim->vco2.minfreq, maxvco2 = pll_lim->vco2.maxfreq;
 	int minU1 = pll_lim->vco1.min_inputfreq, minU2 = pll_lim->vco2.min_inputfreq;
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 2281f99..6dfb425 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -35,22 +35,27 @@
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_bo *pb = chan->pushbuf_bo;
 	struct nouveau_gpuobj *pushbuf = NULL;
-	uint32_t start = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	int ret;
 
+	if (dev_priv->card_type >= NV_50) {
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0,
+					     dev_priv->vm_end, NV_DMA_ACCESS_RO,
+					     NV_DMA_TARGET_AGP, &pushbuf);
+		chan->pushbuf_base = pb->bo.offset;
+	} else
 	if (pb->bo.mem.mem_type == TTM_PL_TT) {
 		ret = nouveau_gpuobj_gart_dma_new(chan, 0,
 						  dev_priv->gart_info.aper_size,
 						  NV_DMA_ACCESS_RO, &pushbuf,
 						  NULL);
-		chan->pushbuf_base = start;
+		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	} else
 	if (dev_priv->card_type != NV_04) {
 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY, 0,
 					     dev_priv->fb_available_size,
 					     NV_DMA_ACCESS_RO,
 					     NV_DMA_TARGET_VIDMEM, &pushbuf);
-		chan->pushbuf_base = start;
+		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	} else {
 		/* NV04 cmdbuf hack, from original ddx.. not sure of it's
 		 * exact reason for existing :)  PCI access to cmdbuf in
@@ -61,7 +66,7 @@
 					     dev_priv->fb_available_size,
 					     NV_DMA_ACCESS_RO,
 					     NV_DMA_TARGET_PCI, &pushbuf);
-		chan->pushbuf_base = start;
+		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
 	}
 
 	ret = nouveau_gpuobj_ref_add(dev, chan, 0, pushbuf, &chan->pushbuf);
@@ -275,9 +280,18 @@
 	 */
 	nouveau_fence_fini(chan);
 
-	/* Ensure the channel is no longer active on the GPU */
+	/* This will prevent pfifo from switching channels. */
 	pfifo->reassign(dev, false);
 
+	/* We want to give pgraph a chance to idle and get rid of all potential
+	 * errors. We need to do this before the lock, otherwise the irq handler
+	 * is unable to process them.
+	 */
+	if (pgraph->channel(dev) == chan)
+		nouveau_wait_for_idle(dev);
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	pgraph->fifo_access(dev, false);
 	if (pgraph->channel(dev) == chan)
 		pgraph->unload_context(dev);
@@ -293,6 +307,8 @@
 
 	pfifo->reassign(dev, true);
 
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
 	/* Release the channel's resources */
 	nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
 	if (chan->pushbuf_bo) {
@@ -369,6 +385,14 @@
 		return ret;
 	init->channel  = chan->id;
 
+	if (chan->dma.ib_max)
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
+					NOUVEAU_GEM_DOMAIN_GART;
+	else if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_VRAM)
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM;
+	else
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_GART;
+
 	init->subchan[0].handle = NvM2MF;
 	if (dev_priv->card_type < NV_50)
 		init->subchan[0].grclass = 0x0039;
@@ -408,7 +432,6 @@
  ***********************************/
 
 struct drm_ioctl_desc nouveau_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_NOUVEAU_CARD_INIT, nouveau_ioctl_card_init, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
@@ -418,13 +441,9 @@
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF_CALL, nouveau_gem_ioctl_pushbuf_call, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PIN, nouveau_gem_ioctl_pin, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_UNPIN, nouveau_gem_ioctl_unpin, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF_CALL2, nouveau_gem_ioctl_pushbuf_call2, DRM_AUTH),
 };
 
 int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index d2f6335..24327f4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -218,7 +218,7 @@
 			connector->interlace_allowed = true;
 	}
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) {
+	if (nv_connector->dcb->type == DCB_CONNECTOR_DVI_I) {
 		drm_connector_property_set_value(connector,
 			dev->mode_config.dvi_i_subconnector_property,
 			nv_encoder->dcb->type == OUTPUT_TMDS ?
@@ -236,15 +236,17 @@
 	struct nouveau_i2c_chan *i2c;
 	int type, flags;
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+	if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS)
 		nv_encoder = find_encoder_by_type(connector, OUTPUT_LVDS);
 	if (nv_encoder && nv_connector->native_mode) {
+		unsigned status = connector_status_connected;
+
 #ifdef CONFIG_ACPI
 		if (!nouveau_ignorelid && !acpi_lid_open())
-			return connector_status_disconnected;
+			status = connector_status_unknown;
 #endif
 		nouveau_connector_set_encoder(connector, nv_encoder);
-		return connector_status_connected;
+		return status;
 	}
 
 	/* Cleanup the previous EDID block. */
@@ -279,7 +281,7 @@
 		 * same i2c channel so the value returned from ddc_detect
 		 * isn't necessarily correct.
 		 */
-		if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) {
+		if (nv_connector->dcb->type == DCB_CONNECTOR_DVI_I) {
 			if (nv_connector->edid->input & DRM_EDID_INPUT_DIGITAL)
 				type = OUTPUT_TMDS;
 			else
@@ -321,11 +323,11 @@
 static void
 nouveau_connector_force(struct drm_connector *connector)
 {
-	struct drm_device *dev = connector->dev;
+	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_encoder *nv_encoder;
 	int type;
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_DVII) {
+	if (nv_connector->dcb->type == DCB_CONNECTOR_DVI_I) {
 		if (connector->force == DRM_FORCE_ON_DIGITAL)
 			type = OUTPUT_TMDS;
 		else
@@ -335,7 +337,7 @@
 
 	nv_encoder = find_encoder_by_type(connector, type);
 	if (!nv_encoder) {
-		NV_ERROR(dev, "can't find encoder to force %s on!\n",
+		NV_ERROR(connector->dev, "can't find encoder to force %s on!\n",
 			 drm_get_connector_name(connector));
 		connector->status = connector_status_disconnected;
 		return;
@@ -369,7 +371,7 @@
 		}
 
 		/* LVDS always needs gpu scaling */
-		if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS &&
+		if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS &&
 		    value == DRM_MODE_SCALE_NONE)
 			return -EINVAL;
 
@@ -535,7 +537,7 @@
 	/* If we're not LVDS, destroy the previous native mode, the attached
 	 * monitor could have changed.
 	 */
-	if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
+	if (nv_connector->dcb->type != DCB_CONNECTOR_LVDS &&
 	    nv_connector->native_mode) {
 		drm_mode_destroy(dev, nv_connector->native_mode);
 		nv_connector->native_mode = NULL;
@@ -563,7 +565,7 @@
 		ret = get_slave_funcs(nv_encoder)->
 			get_modes(to_drm_encoder(nv_encoder), connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
+	if (nv_encoder->dcb->type == OUTPUT_LVDS)
 		ret += nouveau_connector_scaler_modes_add(connector);
 
 	return ret;
@@ -613,6 +615,9 @@
 
 		clock *= 3;
 		break;
+	default:
+		BUG_ON(1);
+		return MODE_BAD;
 	}
 
 	if (clock < min_clock)
@@ -680,7 +685,7 @@
 	/* Firstly try getting EDID over DDC, if allowed and I2C channel
 	 * is available.
 	 */
-	if (!dev_priv->VBIOS.pub.fp_no_ddc && nv_encoder->dcb->i2c_index < 0xf)
+	if (!dev_priv->vbios.fp_no_ddc && nv_encoder->dcb->i2c_index < 0xf)
 		i2c = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
 
 	if (i2c) {
@@ -695,7 +700,7 @@
 	 */
 	if (!nv_connector->edid && nouveau_bios_fp_mode(dev, &native) &&
 	     (nv_encoder->dcb->lvdsconf.use_straps_for_mode ||
-	      dev_priv->VBIOS.pub.fp_no_ddc)) {
+	      dev_priv->vbios.fp_no_ddc)) {
 		nv_connector->native_mode = drm_mode_duplicate(dev, &native);
 		goto out;
 	}
@@ -704,7 +709,7 @@
 	 * stored for the panel stored in them.
 	 */
 	if (!nv_connector->edid && !nv_connector->native_mode &&
-	    !dev_priv->VBIOS.pub.fp_no_ddc) {
+	    !dev_priv->vbios.fp_no_ddc) {
 		struct edid *edid =
 			(struct edid *)nouveau_bios_embedded_edid(dev);
 		if (edid) {
@@ -739,46 +744,66 @@
 }
 
 int
-nouveau_connector_create(struct drm_device *dev, int index, int type)
+nouveau_connector_create(struct drm_device *dev,
+			 struct dcb_connector_table_entry *dcb)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_connector *nv_connector = NULL;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
-	int ret;
+	int ret, type;
 
 	NV_DEBUG_KMS(dev, "\n");
 
+	switch (dcb->type) {
+	case DCB_CONNECTOR_NONE:
+		return 0;
+	case DCB_CONNECTOR_VGA:
+		NV_INFO(dev, "Detected a VGA connector\n");
+		type = DRM_MODE_CONNECTOR_VGA;
+		break;
+	case DCB_CONNECTOR_TV_0:
+	case DCB_CONNECTOR_TV_1:
+	case DCB_CONNECTOR_TV_3:
+		NV_INFO(dev, "Detected a TV connector\n");
+		type = DRM_MODE_CONNECTOR_TV;
+		break;
+	case DCB_CONNECTOR_DVI_I:
+		NV_INFO(dev, "Detected a DVI-I connector\n");
+		type = DRM_MODE_CONNECTOR_DVII;
+		break;
+	case DCB_CONNECTOR_DVI_D:
+		NV_INFO(dev, "Detected a DVI-D connector\n");
+		type = DRM_MODE_CONNECTOR_DVID;
+		break;
+	case DCB_CONNECTOR_HDMI_0:
+	case DCB_CONNECTOR_HDMI_1:
+		NV_INFO(dev, "Detected a HDMI connector\n");
+		type = DRM_MODE_CONNECTOR_HDMIA;
+		break;
+	case DCB_CONNECTOR_LVDS:
+		NV_INFO(dev, "Detected a LVDS connector\n");
+		type = DRM_MODE_CONNECTOR_LVDS;
+		break;
+	case DCB_CONNECTOR_DP:
+		NV_INFO(dev, "Detected a DisplayPort connector\n");
+		type = DRM_MODE_CONNECTOR_DisplayPort;
+		break;
+	case DCB_CONNECTOR_eDP:
+		NV_INFO(dev, "Detected an eDP connector\n");
+		type = DRM_MODE_CONNECTOR_eDP;
+		break;
+	default:
+		NV_ERROR(dev, "unknown connector type: 0x%02x!!\n", dcb->type);
+		return -EINVAL;
+	}
+
 	nv_connector = kzalloc(sizeof(*nv_connector), GFP_KERNEL);
 	if (!nv_connector)
 		return -ENOMEM;
-	nv_connector->dcb = nouveau_bios_connector_entry(dev, index);
+	nv_connector->dcb = dcb;
 	connector = &nv_connector->base;
 
-	switch (type) {
-	case DRM_MODE_CONNECTOR_VGA:
-		NV_INFO(dev, "Detected a VGA connector\n");
-		break;
-	case DRM_MODE_CONNECTOR_DVID:
-		NV_INFO(dev, "Detected a DVI-D connector\n");
-		break;
-	case DRM_MODE_CONNECTOR_DVII:
-		NV_INFO(dev, "Detected a DVI-I connector\n");
-		break;
-	case DRM_MODE_CONNECTOR_LVDS:
-		NV_INFO(dev, "Detected a LVDS connector\n");
-		break;
-	case DRM_MODE_CONNECTOR_TV:
-		NV_INFO(dev, "Detected a TV connector\n");
-		break;
-	case DRM_MODE_CONNECTOR_DisplayPort:
-		NV_INFO(dev, "Detected a DisplayPort connector\n");
-		break;
-	default:
-		NV_ERROR(dev, "Unknown connector, this is not good.\n");
-		break;
-	}
-
 	/* defaults, will get overridden in detect() */
 	connector->interlace_allowed = false;
 	connector->doublescan_allowed = false;
@@ -786,44 +811,11 @@
 	drm_connector_init(dev, connector, &nouveau_connector_funcs, type);
 	drm_connector_helper_add(connector, &nouveau_connector_helper_funcs);
 
-	/* Init DVI-I specific properties */
-	if (type == DRM_MODE_CONNECTOR_DVII) {
-		drm_mode_create_dvi_i_properties(dev);
-		drm_connector_attach_property(connector, dev->mode_config.dvi_i_subconnector_property, 0);
-		drm_connector_attach_property(connector, dev->mode_config.dvi_i_select_subconnector_property, 0);
-	}
-
-	if (type != DRM_MODE_CONNECTOR_LVDS)
-		nv_connector->use_dithering = false;
-
-	if (type == DRM_MODE_CONNECTOR_DVID ||
-	    type == DRM_MODE_CONNECTOR_DVII ||
-	    type == DRM_MODE_CONNECTOR_LVDS ||
-	    type == DRM_MODE_CONNECTOR_DisplayPort) {
-		nv_connector->scaling_mode = DRM_MODE_SCALE_FULLSCREEN;
-
-		drm_connector_attach_property(connector, dev->mode_config.scaling_mode_property,
-					      nv_connector->scaling_mode);
-		drm_connector_attach_property(connector, dev->mode_config.dithering_mode_property,
-					      nv_connector->use_dithering ? DRM_MODE_DITHERING_ON
-					      : DRM_MODE_DITHERING_OFF);
-
-	} else {
-		nv_connector->scaling_mode = DRM_MODE_SCALE_NONE;
-
-		if (type == DRM_MODE_CONNECTOR_VGA  &&
-				dev_priv->card_type >= NV_50) {
-			drm_connector_attach_property(connector,
-					dev->mode_config.scaling_mode_property,
-					nv_connector->scaling_mode);
-		}
-	}
-
 	/* attach encoders */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 
-		if (nv_encoder->dcb->connector != index)
+		if (nv_encoder->dcb->connector != dcb->index)
 			continue;
 
 		if (get_slave_funcs(nv_encoder))
@@ -832,9 +824,52 @@
 		drm_mode_connector_attach_encoder(connector, encoder);
 	}
 
+	if (!connector->encoder_ids[0]) {
+		NV_WARN(dev, "  no encoders, ignoring\n");
+		drm_connector_cleanup(connector);
+		kfree(connector);
+		return 0;
+	}
+
+	/* Init DVI-I specific properties */
+	if (dcb->type == DCB_CONNECTOR_DVI_I) {
+		drm_mode_create_dvi_i_properties(dev);
+		drm_connector_attach_property(connector, dev->mode_config.dvi_i_subconnector_property, 0);
+		drm_connector_attach_property(connector, dev->mode_config.dvi_i_select_subconnector_property, 0);
+	}
+
+	if (dcb->type != DCB_CONNECTOR_LVDS)
+		nv_connector->use_dithering = false;
+
+	switch (dcb->type) {
+	case DCB_CONNECTOR_VGA:
+		if (dev_priv->card_type >= NV_50) {
+			drm_connector_attach_property(connector,
+					dev->mode_config.scaling_mode_property,
+					nv_connector->scaling_mode);
+		}
+		/* fall-through */
+	case DCB_CONNECTOR_TV_0:
+	case DCB_CONNECTOR_TV_1:
+	case DCB_CONNECTOR_TV_3:
+		nv_connector->scaling_mode = DRM_MODE_SCALE_NONE;
+		break;
+	default:
+		nv_connector->scaling_mode = DRM_MODE_SCALE_FULLSCREEN;
+
+		drm_connector_attach_property(connector,
+				dev->mode_config.scaling_mode_property,
+				nv_connector->scaling_mode);
+		drm_connector_attach_property(connector,
+				dev->mode_config.dithering_mode_property,
+				nv_connector->use_dithering ?
+				DRM_MODE_DITHERING_ON : DRM_MODE_DITHERING_OFF);
+		break;
+	}
+
 	drm_sysfs_connector_add(connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
+	if (dcb->type == DCB_CONNECTOR_LVDS) {
 		ret = nouveau_connector_create_lvds(dev, connector);
 		if (ret) {
 			connector->funcs->destroy(connector);
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index 728b809..4ef38ab 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -49,6 +49,7 @@
 	return container_of(con, struct nouveau_connector, base);
 }
 
-int nouveau_connector_create(struct drm_device *dev, int i2c_index, int type);
+int nouveau_connector_create(struct drm_device *,
+			     struct dcb_connector_table_entry *);
 
 #endif /* __NOUVEAU_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index d79db36..8ff9ef5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -47,12 +47,23 @@
 	seq_printf(m, "           cur: 0x%08x\n", chan->dma.cur << 2);
 	seq_printf(m, "           put: 0x%08x\n", chan->dma.put << 2);
 	seq_printf(m, "          free: 0x%08x\n", chan->dma.free << 2);
+	if (chan->dma.ib_max) {
+		seq_printf(m, "        ib max: 0x%08x\n", chan->dma.ib_max);
+		seq_printf(m, "        ib put: 0x%08x\n", chan->dma.ib_put);
+		seq_printf(m, "       ib free: 0x%08x\n", chan->dma.ib_free);
+	}
 
 	seq_printf(m, "gpu fifo state:\n");
 	seq_printf(m, "           get: 0x%08x\n",
 					nvchan_rd32(chan, chan->user_get));
 	seq_printf(m, "           put: 0x%08x\n",
 					nvchan_rd32(chan, chan->user_put));
+	if (chan->dma.ib_max) {
+		seq_printf(m, "        ib get: 0x%08x\n",
+			   nvchan_rd32(chan, 0x88));
+		seq_printf(m, "        ib put: 0x%08x\n",
+			   nvchan_rd32(chan, 0x8c));
+	}
 
 	seq_printf(m, "last fence    : %d\n", chan->fence.sequence);
 	seq_printf(m, "last signalled: %d\n", chan->fence.sequence_ack);
@@ -133,9 +144,22 @@
 	return 0;
 }
 
+static int
+nouveau_debugfs_vbios_image(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_nouveau_private *dev_priv = node->minor->dev->dev_private;
+	int i;
+
+	for (i = 0; i < dev_priv->vbios.length; i++)
+		seq_printf(m, "%c", dev_priv->vbios.data[i]);
+	return 0;
+}
+
 static struct drm_info_list nouveau_debugfs_list[] = {
 	{ "chipset", nouveau_debugfs_chipset_info, 0, NULL },
 	{ "memory", nouveau_debugfs_memory_info, 0, NULL },
+	{ "vbios.rom", nouveau_debugfs_vbios_image, 0, NULL },
 };
 #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index dfc9439..cf1c5c0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -39,11 +39,8 @@
 	if (drm_fb->fbdev)
 		nouveau_fbcon_remove(dev, drm_fb);
 
-	if (fb->nvbo) {
-		mutex_lock(&dev->struct_mutex);
-		drm_gem_object_unreference(fb->nvbo->gem);
-		mutex_unlock(&dev->struct_mutex);
-	}
+	if (fb->nvbo)
+		drm_gem_object_unreference_unlocked(fb->nvbo->gem);
 
 	drm_framebuffer_cleanup(drm_fb);
 	kfree(fb);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 50d9e67..c8482a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -32,7 +32,22 @@
 void
 nouveau_dma_pre_init(struct nouveau_channel *chan)
 {
-	chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nouveau_bo *pushbuf = chan->pushbuf_bo;
+
+	if (dev_priv->card_type == NV_50) {
+		const int ib_size = pushbuf->bo.mem.size / 2;
+
+		chan->dma.ib_base = (pushbuf->bo.mem.size - ib_size) >> 2;
+		chan->dma.ib_max = (ib_size / 8) - 1;
+		chan->dma.ib_put = 0;
+		chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put;
+
+		chan->dma.max = (pushbuf->bo.mem.size - ib_size) >> 2;
+	} else {
+		chan->dma.max  = (pushbuf->bo.mem.size >> 2) - 2;
+	}
+
 	chan->dma.put  = 0;
 	chan->dma.cur  = chan->dma.put;
 	chan->dma.free = chan->dma.max - chan->dma.cur;
@@ -162,12 +177,101 @@
 	return (val - chan->pushbuf_base) >> 2;
 }
 
+void
+nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
+	      int delta, int length)
+{
+	struct nouveau_bo *pb = chan->pushbuf_bo;
+	uint64_t offset = bo->bo.offset + delta;
+	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
+
+	BUG_ON(chan->dma.ib_free < 1);
+	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
+	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
+
+	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
+	nvchan_wr32(chan, 0x8c, chan->dma.ib_put);
+	chan->dma.ib_free--;
+}
+
+static int
+nv50_dma_push_wait(struct nouveau_channel *chan, int count)
+{
+	uint32_t cnt = 0, prev_get = 0;
+
+	while (chan->dma.ib_free < count) {
+		uint32_t get = nvchan_rd32(chan, 0x88);
+		if (get != prev_get) {
+			prev_get = get;
+			cnt = 0;
+		}
+
+		if ((++cnt & 0xff) == 0) {
+			DRM_UDELAY(1);
+			if (cnt > 100000)
+				return -EBUSY;
+		}
+
+		chan->dma.ib_free = get - chan->dma.ib_put;
+		if (chan->dma.ib_free <= 0)
+			chan->dma.ib_free += chan->dma.ib_max + 1;
+	}
+
+	return 0;
+}
+
+static int
+nv50_dma_wait(struct nouveau_channel *chan, int slots, int count)
+{
+	uint32_t cnt = 0, prev_get = 0;
+	int ret;
+
+	ret = nv50_dma_push_wait(chan, slots + 1);
+	if (unlikely(ret))
+		return ret;
+
+	while (chan->dma.free < count) {
+		int get = READ_GET(chan, &prev_get, &cnt);
+		if (unlikely(get < 0)) {
+			if (get == -EINVAL)
+				continue;
+
+			return get;
+		}
+
+		if (get <= chan->dma.cur) {
+			chan->dma.free = chan->dma.max - chan->dma.cur;
+			if (chan->dma.free >= count)
+				break;
+
+			FIRE_RING(chan);
+			do {
+				get = READ_GET(chan, &prev_get, &cnt);
+				if (unlikely(get < 0)) {
+					if (get == -EINVAL)
+						continue;
+					return get;
+				}
+			} while (get == 0);
+			chan->dma.cur = 0;
+			chan->dma.put = 0;
+		}
+
+		chan->dma.free = get - chan->dma.cur - 1;
+	}
+
+	return 0;
+}
+
 int
-nouveau_dma_wait(struct nouveau_channel *chan, int size)
+nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size)
 {
 	uint32_t prev_get = 0, cnt = 0;
 	int get;
 
+	if (chan->dma.ib_max)
+		return nv50_dma_wait(chan, slots, size);
+
 	while (chan->dma.free < size) {
 		get = READ_GET(chan, &prev_get, &cnt);
 		if (unlikely(get == -EBUSY))
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index dabfd65..8b05c15 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -31,6 +31,9 @@
 #define NOUVEAU_DMA_DEBUG 0
 #endif
 
+void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
+		   int delta, int length);
+
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
  * to avoid this we jump to offset + SKIPS and fill the difference with
@@ -96,13 +99,11 @@
 static __must_check inline int
 RING_SPACE(struct nouveau_channel *chan, int size)
 {
-	if (chan->dma.free < size) {
-		int ret;
+	int ret;
 
-		ret = nouveau_dma_wait(chan, size);
-		if (ret)
-			return ret;
-	}
+	ret = nouveau_dma_wait(chan, 1, size);
+	if (ret)
+		return ret;
 
 	chan->dma.free -= size;
 	return 0;
@@ -146,7 +147,13 @@
 		return;
 	chan->accel_done = true;
 
-	WRITE_PUT(chan->dma.cur);
+	if (chan->dma.ib_max) {
+		nv50_dma_push(chan, chan->pushbuf_bo, chan->dma.put << 2,
+			      (chan->dma.cur - chan->dma.put) << 2);
+	} else {
+		WRITE_PUT(chan->dma.cur);
+	}
+
 	chan->dma.put = chan->dma.cur;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index da3b93b..30cc09e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -75,11 +75,11 @@
 int nouveau_ignorelid = 0;
 module_param_named(ignorelid, nouveau_ignorelid, int, 0400);
 
-MODULE_PARM_DESC(noagp, "Disable all acceleration");
+MODULE_PARM_DESC(noaccel, "Disable all acceleration");
 int nouveau_noaccel = 0;
 module_param_named(noaccel, nouveau_noaccel, int, 0400);
 
-MODULE_PARM_DESC(noagp, "Disable fbcon acceleration");
+MODULE_PARM_DESC(nofbaccel, "Disable fbcon acceleration");
 int nouveau_nofbaccel = 0;
 module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400);
 
@@ -135,7 +135,7 @@
 	drm_put_dev(dev);
 }
 
-static int
+int
 nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -233,7 +233,7 @@
 	return ret;
 }
 
-static int
+int
 nouveau_pci_resume(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -402,8 +402,10 @@
 			nouveau_modeset = 1;
 	}
 
-	if (nouveau_modeset == 1)
+	if (nouveau_modeset == 1) {
 		driver.driver_features |= DRIVER_MODESET;
+		nouveau_register_dsm_handler();
+	}
 
 	return drm_init(&driver);
 }
@@ -411,6 +413,7 @@
 static void __exit nouveau_exit(void)
 {
 	drm_exit(&driver);
+	nouveau_unregister_dsm_handler();
 }
 
 module_init(nouveau_init);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 1c15ef3..5f8d987 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -34,7 +34,7 @@
 
 #define DRIVER_MAJOR		0
 #define DRIVER_MINOR		0
-#define DRIVER_PATCHLEVEL	15
+#define DRIVER_PATCHLEVEL	16
 
 #define NOUVEAU_FAMILY   0x0000FFFF
 #define NOUVEAU_FLAGS    0xFFFF0000
@@ -83,6 +83,7 @@
 	struct drm_file *reserved_by;
 	struct list_head entry;
 	int pbbo_index;
+	bool validate_mapped;
 
 	struct nouveau_channel *channel;
 
@@ -239,6 +240,11 @@
 		int cur;
 		int put;
 		/* access via pushbuf_bo */
+
+		int ib_base;
+		int ib_max;
+		int ib_free;
+		int ib_put;
 	} dma;
 
 	uint32_t sw_subchannel[8];
@@ -533,6 +539,9 @@
 	struct nouveau_engine engine;
 	struct nouveau_channel *channel;
 
+	/* For PFIFO and PGRAPH. */
+	spinlock_t context_switch_lock;
+
 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
 	struct nouveau_gpuobj *ramht;
 	uint32_t ramin_rsvd_vram;
@@ -596,8 +605,7 @@
 
 	struct list_head gpuobj_list;
 
-	struct nvbios VBIOS;
-	struct nouveau_bios_info *vbios;
+	struct nvbios vbios;
 
 	struct nv04_mode_state mode_reg;
 	struct nv04_mode_state saved_reg;
@@ -614,7 +622,6 @@
 	} susres;
 
 	struct backlight_device *backlight;
-	bool acpi_dsm;
 
 	struct nouveau_channel *evo;
 
@@ -682,6 +689,9 @@
 extern int nouveau_nofbaccel;
 extern int nouveau_noaccel;
 
+extern int nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state);
+extern int nouveau_pci_resume(struct pci_dev *pdev);
+
 /* nouveau_state.c */
 extern void nouveau_preclose(struct drm_device *dev, struct drm_file *);
 extern int  nouveau_load(struct drm_device *, unsigned long flags);
@@ -696,12 +706,6 @@
 			       uint32_t reg, uint32_t mask, uint32_t val);
 extern bool nouveau_wait_for_idle(struct drm_device *);
 extern int  nouveau_card_init(struct drm_device *);
-extern int  nouveau_ioctl_card_init(struct drm_device *, void *data,
-				    struct drm_file *);
-extern int  nouveau_ioctl_suspend(struct drm_device *, void *data,
-				  struct drm_file *);
-extern int  nouveau_ioctl_resume(struct drm_device *, void *data,
-				 struct drm_file *);
 
 /* nouveau_mem.c */
 extern int  nouveau_mem_init_heap(struct mem_block **, uint64_t start,
@@ -845,21 +849,15 @@
 /* nouveau_dma.c */
 extern void nouveau_dma_pre_init(struct nouveau_channel *);
 extern int  nouveau_dma_init(struct nouveau_channel *);
-extern int  nouveau_dma_wait(struct nouveau_channel *, int size);
+extern int  nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
 
 /* nouveau_acpi.c */
-#ifdef CONFIG_ACPI
-extern int nouveau_hybrid_setup(struct drm_device *dev);
-extern bool nouveau_dsm_probe(struct drm_device *dev);
+#if defined(CONFIG_ACPI)
+void nouveau_register_dsm_handler(void);
+void nouveau_unregister_dsm_handler(void);
 #else
-static inline int nouveau_hybrid_setup(struct drm_device *dev)
-{
-	return 0;
-}
-static inline bool nouveau_dsm_probe(struct drm_device *dev)
-{
-	return false;
-}
+static inline void nouveau_register_dsm_handler(void) {}
+static inline void nouveau_unregister_dsm_handler(void) {}
 #endif
 
 /* nouveau_backlight.c */
@@ -1027,6 +1025,7 @@
 extern int  nv50_graph_load_context(struct nouveau_channel *);
 extern int  nv50_graph_unload_context(struct drm_device *);
 extern void nv50_graph_context_switch(struct drm_device *);
+extern int  nv50_grctx_init(struct nouveau_grctx *);
 
 /* nouveau_grctx.c */
 extern int  nouveau_grctx_prog_load(struct drm_device *);
@@ -1152,16 +1151,6 @@
 				 struct drm_file *);
 extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *,
 				     struct drm_file *);
-extern int nouveau_gem_ioctl_pushbuf_call(struct drm_device *, void *,
-					  struct drm_file *);
-extern int nouveau_gem_ioctl_pushbuf_call2(struct drm_device *, void *,
-					   struct drm_file *);
-extern int nouveau_gem_ioctl_pin(struct drm_device *, void *,
-				 struct drm_file *);
-extern int nouveau_gem_ioctl_unpin(struct drm_device *, void *,
-				   struct drm_file *);
-extern int nouveau_gem_ioctl_tile(struct drm_device *, void *,
-				  struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_prep(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index ea879a2..68cedd9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -36,6 +36,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/screen_info.h>
+#include <linux/vga_switcheroo.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -370,6 +371,7 @@
 						nvbo->bo.offset, nvbo);
 
 	mutex_unlock(&dev->struct_mutex);
+	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unref:
@@ -401,10 +403,8 @@
 
 		unregister_framebuffer(info);
 		nouveau_bo_unmap(nouveau_fb->nvbo);
-		mutex_lock(&dev->struct_mutex);
-		drm_gem_object_unreference(nouveau_fb->nvbo->gem);
+		drm_gem_object_unreference_unlocked(nouveau_fb->nvbo->gem);
 		nouveau_fb->nvbo = NULL;
-		mutex_unlock(&dev->struct_mutex);
 		if (par)
 			drm_fb_helper_free(&par->helper);
 		framebuffer_release(info);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 70cc308..0d22f66 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -167,12 +167,10 @@
 
 	ret = drm_gem_handle_create(file_priv, nvbo->gem, &req->info.handle);
 out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_handle_unreference(nvbo->gem);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_handle_unreference_unlocked(nvbo->gem);
 
 	if (ret)
-		drm_gem_object_unreference(nvbo->gem);
+		drm_gem_object_unreference_unlocked(nvbo->gem);
 	return ret;
 }
 
@@ -243,6 +241,11 @@
 			nouveau_fence_unref((void *)&prev_fence);
 		}
 
+		if (unlikely(nvbo->validate_mapped)) {
+			ttm_bo_kunmap(&nvbo->kmap);
+			nvbo->validate_mapped = false;
+		}
+
 		list_del(&nvbo->entry);
 		nvbo->reserved_by = NULL;
 		ttm_bo_unreserve(&nvbo->bo);
@@ -302,11 +305,14 @@
 			if (ret == -EAGAIN)
 				ret = ttm_bo_wait_unreserved(&nvbo->bo, false);
 			drm_gem_object_unreference(gem);
-			if (ret)
+			if (ret) {
+				NV_ERROR(dev, "fail reserve\n");
 				return ret;
+			}
 			goto retry;
 		}
 
+		b->user_priv = (uint64_t)(unsigned long)nvbo;
 		nvbo->reserved_by = file_priv;
 		nvbo->pbbo_index = i;
 		if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) &&
@@ -336,8 +342,10 @@
 			}
 
 			ret = ttm_bo_wait_cpu(&nvbo->bo, false);
-			if (ret)
+			if (ret) {
+				NV_ERROR(dev, "fail wait_cpu\n");
 				return ret;
+			}
 			goto retry;
 		}
 	}
@@ -351,6 +359,7 @@
 {
 	struct drm_nouveau_gem_pushbuf_bo __user *upbbo =
 				(void __force __user *)(uintptr_t)user_pbbo_ptr;
+	struct drm_device *dev = chan->dev;
 	struct nouveau_bo *nvbo;
 	int ret, relocs = 0;
 
@@ -362,39 +371,46 @@
 			spin_lock(&nvbo->bo.lock);
 			ret = ttm_bo_wait(&nvbo->bo, false, false, false);
 			spin_unlock(&nvbo->bo.lock);
-			if (unlikely(ret))
+			if (unlikely(ret)) {
+				NV_ERROR(dev, "fail wait other chan\n");
 				return ret;
+			}
 		}
 
 		ret = nouveau_gem_set_domain(nvbo->gem, b->read_domains,
 					     b->write_domains,
 					     b->valid_domains);
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			NV_ERROR(dev, "fail set_domain\n");
 			return ret;
+		}
 
 		nvbo->channel = chan;
 		ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
 				      false, false);
 		nvbo->channel = NULL;
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			NV_ERROR(dev, "fail ttm_validate\n");
 			return ret;
+		}
 
-		if (nvbo->bo.offset == b->presumed_offset &&
+		if (nvbo->bo.offset == b->presumed.offset &&
 		    ((nvbo->bo.mem.mem_type == TTM_PL_VRAM &&
-		      b->presumed_domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
+		      b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
 		     (nvbo->bo.mem.mem_type == TTM_PL_TT &&
-		      b->presumed_domain & NOUVEAU_GEM_DOMAIN_GART)))
+		      b->presumed.domain & NOUVEAU_GEM_DOMAIN_GART)))
 			continue;
 
 		if (nvbo->bo.mem.mem_type == TTM_PL_TT)
-			b->presumed_domain = NOUVEAU_GEM_DOMAIN_GART;
+			b->presumed.domain = NOUVEAU_GEM_DOMAIN_GART;
 		else
-			b->presumed_domain = NOUVEAU_GEM_DOMAIN_VRAM;
-		b->presumed_offset = nvbo->bo.offset;
-		b->presumed_ok = 0;
+			b->presumed.domain = NOUVEAU_GEM_DOMAIN_VRAM;
+		b->presumed.offset = nvbo->bo.offset;
+		b->presumed.valid = 0;
 		relocs++;
 
-		if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index], b, sizeof(*b)))
+		if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index].presumed,
+				     &b->presumed, sizeof(b->presumed)))
 			return -EFAULT;
 	}
 
@@ -408,6 +424,7 @@
 			     uint64_t user_buffers, int nr_buffers,
 			     struct validate_op *op, int *apply_relocs)
 {
+	struct drm_device *dev = chan->dev;
 	int ret, relocs = 0;
 
 	INIT_LIST_HEAD(&op->vram_list);
@@ -418,11 +435,14 @@
 		return 0;
 
 	ret = validate_init(chan, file_priv, pbbo, nr_buffers, op);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		NV_ERROR(dev, "validate_init\n");
 		return ret;
+	}
 
 	ret = validate_list(chan, &op->vram_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate vram_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -430,6 +450,7 @@
 
 	ret = validate_list(chan, &op->gart_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate gart_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -437,6 +458,7 @@
 
 	ret = validate_list(chan, &op->both_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate both_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -465,59 +487,82 @@
 }
 
 static int
-nouveau_gem_pushbuf_reloc_apply(struct nouveau_channel *chan, int nr_bo,
-				struct drm_nouveau_gem_pushbuf_bo *bo,
-				unsigned nr_relocs, uint64_t ptr_relocs,
-				unsigned nr_dwords, unsigned first_dword,
-				uint32_t *pushbuf, bool is_iomem)
+nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
+				struct drm_nouveau_gem_pushbuf *req,
+				struct drm_nouveau_gem_pushbuf_bo *bo)
 {
 	struct drm_nouveau_gem_pushbuf_reloc *reloc = NULL;
-	struct drm_device *dev = chan->dev;
 	int ret = 0;
 	unsigned i;
 
-	reloc = u_memcpya(ptr_relocs, nr_relocs, sizeof(*reloc));
+	reloc = u_memcpya(req->relocs, req->nr_relocs, sizeof(*reloc));
 	if (IS_ERR(reloc))
 		return PTR_ERR(reloc);
 
-	for (i = 0; i < nr_relocs; i++) {
+	for (i = 0; i < req->nr_relocs; i++) {
 		struct drm_nouveau_gem_pushbuf_reloc *r = &reloc[i];
 		struct drm_nouveau_gem_pushbuf_bo *b;
+		struct nouveau_bo *nvbo;
 		uint32_t data;
 
-		if (r->bo_index >= nr_bo || r->reloc_index < first_dword ||
-		    r->reloc_index >= first_dword + nr_dwords) {
-			NV_ERROR(dev, "Bad relocation %d\n", i);
-			NV_ERROR(dev, "  bo: %d max %d\n", r->bo_index, nr_bo);
-			NV_ERROR(dev, "  id: %d max %d\n", r->reloc_index, nr_dwords);
+		if (unlikely(r->bo_index > req->nr_buffers)) {
+			NV_ERROR(dev, "reloc bo index invalid\n");
 			ret = -EINVAL;
 			break;
 		}
 
 		b = &bo[r->bo_index];
-		if (b->presumed_ok)
+		if (b->presumed.valid)
 			continue;
 
+		if (unlikely(r->reloc_bo_index > req->nr_buffers)) {
+			NV_ERROR(dev, "reloc container bo index invalid\n");
+			ret = -EINVAL;
+			break;
+		}
+		nvbo = (void *)(unsigned long)bo[r->reloc_bo_index].user_priv;
+
+		if (unlikely(r->reloc_bo_offset + 4 >
+			     nvbo->bo.mem.num_pages << PAGE_SHIFT)) {
+			NV_ERROR(dev, "reloc outside of bo\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!nvbo->kmap.virtual) {
+			ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
+					  &nvbo->kmap);
+			if (ret) {
+				NV_ERROR(dev, "failed kmap for reloc\n");
+				break;
+			}
+			nvbo->validate_mapped = true;
+		}
+
 		if (r->flags & NOUVEAU_GEM_RELOC_LOW)
-			data = b->presumed_offset + r->data;
+			data = b->presumed.offset + r->data;
 		else
 		if (r->flags & NOUVEAU_GEM_RELOC_HIGH)
-			data = (b->presumed_offset + r->data) >> 32;
+			data = (b->presumed.offset + r->data) >> 32;
 		else
 			data = r->data;
 
 		if (r->flags & NOUVEAU_GEM_RELOC_OR) {
-			if (b->presumed_domain == NOUVEAU_GEM_DOMAIN_GART)
+			if (b->presumed.domain == NOUVEAU_GEM_DOMAIN_GART)
 				data |= r->tor;
 			else
 				data |= r->vor;
 		}
 
-		if (is_iomem)
-			iowrite32_native(data, (void __force __iomem *)
-						&pushbuf[r->reloc_index]);
-		else
-			pushbuf[r->reloc_index] = data;
+		spin_lock(&nvbo->bo.lock);
+		ret = ttm_bo_wait(&nvbo->bo, false, false, false);
+		spin_unlock(&nvbo->bo.lock);
+		if (ret) {
+			NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
+			break;
+		}
+
+		nouveau_bo_wr32(nvbo, r->reloc_bo_offset >> 2, data);
 	}
 
 	kfree(reloc);
@@ -528,37 +573,48 @@
 nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct drm_nouveau_gem_pushbuf *req = data;
-	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
+	struct drm_nouveau_gem_pushbuf_push *push;
+	struct drm_nouveau_gem_pushbuf_bo *bo;
 	struct nouveau_channel *chan;
 	struct validate_op op;
-	struct nouveau_fence* fence = 0;
-	uint32_t *pushbuf = NULL;
-	int ret = 0, do_reloc = 0, i;
+	struct nouveau_fence *fence = 0;
+	int i, j, ret = 0, do_reloc = 0;
 
 	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
 	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
 
-	if (req->nr_dwords >= chan->dma.max ||
-	    req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS ||
-	    req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS) {
-		NV_ERROR(dev, "Pushbuf config exceeds limits:\n");
-		NV_ERROR(dev, "  dwords : %d max %d\n", req->nr_dwords,
-			 chan->dma.max - 1);
-		NV_ERROR(dev, "  buffers: %d max %d\n", req->nr_buffers,
-			 NOUVEAU_GEM_MAX_BUFFERS);
-		NV_ERROR(dev, "  relocs : %d max %d\n", req->nr_relocs,
-			 NOUVEAU_GEM_MAX_RELOCS);
+	req->vram_available = dev_priv->fb_aper_free;
+	req->gart_available = dev_priv->gart_info.aper_free;
+	if (unlikely(req->nr_push == 0))
+		goto out_next;
+
+	if (unlikely(req->nr_push > NOUVEAU_GEM_MAX_PUSH)) {
+		NV_ERROR(dev, "pushbuf push count exceeds limit: %d max %d\n",
+			 req->nr_push, NOUVEAU_GEM_MAX_PUSH);
 		return -EINVAL;
 	}
 
-	pushbuf = u_memcpya(req->dwords, req->nr_dwords, sizeof(uint32_t));
-	if (IS_ERR(pushbuf))
-		return PTR_ERR(pushbuf);
+	if (unlikely(req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS)) {
+		NV_ERROR(dev, "pushbuf bo count exceeds limit: %d max %d\n",
+			 req->nr_buffers, NOUVEAU_GEM_MAX_BUFFERS);
+		return -EINVAL;
+	}
+
+	if (unlikely(req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS)) {
+		NV_ERROR(dev, "pushbuf reloc count exceeds limit: %d max %d\n",
+			 req->nr_relocs, NOUVEAU_GEM_MAX_RELOCS);
+		return -EINVAL;
+	}
+
+	push = u_memcpya(req->push, req->nr_push, sizeof(*push));
+	if (IS_ERR(push))
+		return PTR_ERR(push);
 
 	bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
 	if (IS_ERR(bo)) {
-		kfree(pushbuf);
+		kfree(push);
 		return PTR_ERR(bo);
 	}
 
@@ -567,215 +623,89 @@
 	/* Validate buffer list */
 	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
 					   req->nr_buffers, &op, &do_reloc);
-	if (ret)
-		goto out;
-
-	/* Apply any relocations that are required */
-	if (do_reloc) {
-		ret = nouveau_gem_pushbuf_reloc_apply(chan, req->nr_buffers,
-						      bo, req->nr_relocs,
-						      req->relocs,
-						      req->nr_dwords, 0,
-						      pushbuf, false);
-		if (ret)
-			goto out;
-	}
-
-	/* Emit push buffer to the hw
-	 */
-	ret = RING_SPACE(chan, req->nr_dwords);
-	if (ret)
-		goto out;
-
-	OUT_RINGp(chan, pushbuf, req->nr_dwords);
-
-	ret = nouveau_fence_new(chan, &fence, true);
-	if (ret) {
-		NV_ERROR(dev, "error fencing pushbuf: %d\n", ret);
-		WIND_RING(chan);
-		goto out;
-	}
-
-	if (nouveau_gem_pushbuf_sync(chan)) {
-		ret = nouveau_fence_wait(fence, NULL, false, false);
-		if (ret) {
-			for (i = 0; i < req->nr_dwords; i++)
-				NV_ERROR(dev, "0x%08x\n", pushbuf[i]);
-			NV_ERROR(dev, "^^ above push buffer is fail :(\n");
-		}
-	}
-
-out:
-	validate_fini(&op, fence);
-	nouveau_fence_unref((void**)&fence);
-	mutex_unlock(&dev->struct_mutex);
-	kfree(pushbuf);
-	kfree(bo);
-	return ret;
-}
-
-#define PUSHBUF_CAL (dev_priv->card_type >= NV_20)
-
-int
-nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct drm_nouveau_gem_pushbuf_call *req = data;
-	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
-	struct nouveau_channel *chan;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *pbbo;
-	struct validate_op op;
-	struct nouveau_fence* fence = 0;
-	int i, ret = 0, do_reloc = 0;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
-
-	if (unlikely(req->handle == 0))
-		goto out_next;
-
-	if (req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS ||
-	    req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS) {
-		NV_ERROR(dev, "Pushbuf config exceeds limits:\n");
-		NV_ERROR(dev, "  buffers: %d max %d\n", req->nr_buffers,
-			 NOUVEAU_GEM_MAX_BUFFERS);
-		NV_ERROR(dev, "  relocs : %d max %d\n", req->nr_relocs,
-			 NOUVEAU_GEM_MAX_RELOCS);
-		return -EINVAL;
-	}
-
-	bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
-	if (IS_ERR(bo))
-		return PTR_ERR(bo);
-
-	mutex_lock(&dev->struct_mutex);
-
-	/* Validate buffer list */
-	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
-					   req->nr_buffers, &op, &do_reloc);
 	if (ret) {
 		NV_ERROR(dev, "validate: %d\n", ret);
 		goto out;
 	}
 
-	/* Validate DMA push buffer */
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem) {
-		NV_ERROR(dev, "Unknown pb handle 0x%08x\n", req->handle);
-		ret = -EINVAL;
-		goto out;
-	}
-	pbbo = nouveau_gem_object(gem);
-
-	if ((req->offset & 3) || req->nr_dwords < 2 ||
-	    (unsigned long)req->offset > (unsigned long)pbbo->bo.mem.size ||
-	    (unsigned long)req->nr_dwords >
-	     ((unsigned long)(pbbo->bo.mem.size - req->offset ) >> 2)) {
-		NV_ERROR(dev, "pb call misaligned or out of bounds: "
-			      "%d + %d * 4 > %ld\n",
-			 req->offset, req->nr_dwords, pbbo->bo.mem.size);
-		ret = -EINVAL;
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	ret = ttm_bo_reserve(&pbbo->bo, false, false, true,
-			     chan->fence.sequence);
-	if (ret) {
-		NV_ERROR(dev, "resv pb: %d\n", ret);
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	nouveau_bo_placement_set(pbbo, 1 << chan->pushbuf_bo->bo.mem.mem_type);
-	ret = ttm_bo_validate(&pbbo->bo, &pbbo->placement, false, false);
-	if (ret) {
-		NV_ERROR(dev, "validate pb: %d\n", ret);
-		ttm_bo_unreserve(&pbbo->bo);
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	list_add_tail(&pbbo->entry, &op.both_list);
-
-	/* If presumed return address doesn't match, we need to map the
-	 * push buffer and fix it..
-	 */
-	if (!PUSHBUF_CAL) {
-		uint32_t retaddy;
-
-		if (chan->dma.free < 4 + NOUVEAU_DMA_SKIPS) {
-			ret = nouveau_dma_wait(chan, 4 + NOUVEAU_DMA_SKIPS);
-			if (ret) {
-				NV_ERROR(dev, "jmp_space: %d\n", ret);
-				goto out;
-			}
-		}
-
-		retaddy  = chan->pushbuf_base + ((chan->dma.cur + 2) << 2);
-		retaddy |= 0x20000000;
-		if (retaddy != req->suffix0) {
-			req->suffix0 = retaddy;
-			do_reloc = 1;
-		}
-	}
-
 	/* Apply any relocations that are required */
 	if (do_reloc) {
-		void *pbvirt;
-		bool is_iomem;
-		ret = ttm_bo_kmap(&pbbo->bo, 0, pbbo->bo.mem.num_pages,
-				  &pbbo->kmap);
-		if (ret) {
-			NV_ERROR(dev, "kmap pb: %d\n", ret);
-			goto out;
-		}
-
-		pbvirt = ttm_kmap_obj_virtual(&pbbo->kmap, &is_iomem);
-		ret = nouveau_gem_pushbuf_reloc_apply(chan, req->nr_buffers, bo,
-						      req->nr_relocs,
-						      req->relocs,
-						      req->nr_dwords,
-						      req->offset / 4,
-						      pbvirt, is_iomem);
-
-		if (!PUSHBUF_CAL) {
-			nouveau_bo_wr32(pbbo,
-					req->offset / 4 + req->nr_dwords - 2,
-					req->suffix0);
-		}
-
-		ttm_bo_kunmap(&pbbo->kmap);
+		ret = nouveau_gem_pushbuf_reloc_apply(dev, req, bo);
 		if (ret) {
 			NV_ERROR(dev, "reloc apply: %d\n", ret);
 			goto out;
 		}
 	}
 
-	if (PUSHBUF_CAL) {
-		ret = RING_SPACE(chan, 2);
+	if (chan->dma.ib_max) {
+		ret = nouveau_dma_wait(chan, req->nr_push + 1, 6);
+		if (ret) {
+			NV_INFO(dev, "nv50cal_space: %d\n", ret);
+			goto out;
+		}
+
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+
+			nv50_dma_push(chan, nvbo, push[i].offset,
+				      push[i].length);
+		}
+	} else
+	if (dev_priv->card_type >= NV_20) {
+		ret = RING_SPACE(chan, req->nr_push * 2);
 		if (ret) {
 			NV_ERROR(dev, "cal_space: %d\n", ret);
 			goto out;
 		}
-		OUT_RING(chan, ((pbbo->bo.mem.mm_node->start << PAGE_SHIFT) +
-				  req->offset) | 2);
-		OUT_RING(chan, 0);
+
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+			struct drm_mm_node *mem = nvbo->bo.mem.mm_node;
+
+			OUT_RING(chan, ((mem->start << PAGE_SHIFT) +
+					push[i].offset) | 2);
+			OUT_RING(chan, 0);
+		}
 	} else {
-		ret = RING_SPACE(chan, 2 + NOUVEAU_DMA_SKIPS);
+		ret = RING_SPACE(chan, req->nr_push * (2 + NOUVEAU_DMA_SKIPS));
 		if (ret) {
 			NV_ERROR(dev, "jmp_space: %d\n", ret);
 			goto out;
 		}
-		OUT_RING(chan, ((pbbo->bo.mem.mm_node->start << PAGE_SHIFT) +
-				  req->offset) | 0x20000000);
-		OUT_RING(chan, 0);
 
-		/* Space the jumps apart with NOPs. */
-		for (i = 0; i < NOUVEAU_DMA_SKIPS; i++)
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+			struct drm_mm_node *mem = nvbo->bo.mem.mm_node;
+			uint32_t cmd;
+
+			cmd = chan->pushbuf_base + ((chan->dma.cur + 2) << 2);
+			cmd |= 0x20000000;
+			if (unlikely(cmd != req->suffix0)) {
+				if (!nvbo->kmap.virtual) {
+					ret = ttm_bo_kmap(&nvbo->bo, 0,
+							  nvbo->bo.mem.
+							  num_pages,
+							  &nvbo->kmap);
+					if (ret) {
+						WIND_RING(chan);
+						goto out;
+					}
+					nvbo->validate_mapped = true;
+				}
+
+				nouveau_bo_wr32(nvbo, (push[i].offset +
+						push[i].length - 8) / 4, cmd);
+			}
+
+			OUT_RING(chan, ((mem->start << PAGE_SHIFT) +
+					push[i].offset) | 0x20000000);
 			OUT_RING(chan, 0);
+			for (j = 0; j < NOUVEAU_DMA_SKIPS; j++)
+				OUT_RING(chan, 0);
+		}
 	}
 
 	ret = nouveau_fence_new(chan, &fence, true);
@@ -790,9 +720,14 @@
 	nouveau_fence_unref((void**)&fence);
 	mutex_unlock(&dev->struct_mutex);
 	kfree(bo);
+	kfree(push);
 
 out_next:
-	if (PUSHBUF_CAL) {
+	if (chan->dma.ib_max) {
+		req->suffix0 = 0x00000000;
+		req->suffix1 = 0x00000000;
+	} else
+	if (dev_priv->card_type >= NV_20) {
 		req->suffix0 = 0x00020000;
 		req->suffix1 = 0x00000000;
 	} else {
@@ -804,19 +739,6 @@
 	return ret;
 }
 
-int
-nouveau_gem_ioctl_pushbuf_call2(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct drm_nouveau_gem_pushbuf_call *req = data;
-
-	req->vram_available = dev_priv->fb_aper_free;
-	req->gart_available = dev_priv->gart_info.aper_free;
-
-	return nouveau_gem_ioctl_pushbuf_call(dev, data, file_priv);
-}
-
 static inline uint32_t
 domain_to_ttm(struct nouveau_bo *nvbo, uint32_t domain)
 {
@@ -831,74 +753,6 @@
 }
 
 int
-nouveau_gem_ioctl_pin(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_nouveau_gem_pin *req = data;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *nvbo;
-	int ret = 0;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		NV_ERROR(dev, "pin only allowed without kernel modesetting\n");
-		return -EINVAL;
-	}
-
-	if (!DRM_SUSER(DRM_CURPROC))
-		return -EPERM;
-
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem)
-		return -EINVAL;
-	nvbo = nouveau_gem_object(gem);
-
-	ret = nouveau_bo_pin(nvbo, domain_to_ttm(nvbo, req->domain));
-	if (ret)
-		goto out;
-
-	req->offset = nvbo->bo.offset;
-	if (nvbo->bo.mem.mem_type == TTM_PL_TT)
-		req->domain = NOUVEAU_GEM_DOMAIN_GART;
-	else
-		req->domain = NOUVEAU_GEM_DOMAIN_VRAM;
-
-out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-
-int
-nouveau_gem_ioctl_unpin(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_nouveau_gem_pin *req = data;
-	struct drm_gem_object *gem;
-	int ret;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		return -EINVAL;
-
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem)
-		return -EINVAL;
-
-	ret = nouveau_bo_unpin(nouveau_gem_object(gem));
-
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-
-int
 nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
@@ -935,9 +789,7 @@
 	}
 
 out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gem);
 	return ret;
 }
 
@@ -965,9 +817,7 @@
 	ret = 0;
 
 out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gem);
 	return ret;
 }
 
@@ -986,9 +836,7 @@
 		return -EINVAL;
 
 	ret = nouveau_gem_info(gem, req);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gem);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_hw.c b/drivers/gpu/drm/nouveau/nouveau_hw.c
index dc46792..7855b35 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hw.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hw.c
@@ -160,7 +160,7 @@
 setPLL_single(struct drm_device *dev, uint32_t reg, struct nouveau_pll_vals *pv)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chip_version = dev_priv->vbios->chip_version;
+	int chip_version = dev_priv->vbios.chip_version;
 	uint32_t oldpll = NVReadRAMDAC(dev, 0, reg);
 	int oldN = (oldpll >> 8) & 0xff, oldM = oldpll & 0xff;
 	uint32_t pll = (oldpll & 0xfff80000) | pv->log2P << 16 | pv->NM1;
@@ -216,7 +216,7 @@
 		       struct nouveau_pll_vals *pv)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int chip_version = dev_priv->vbios->chip_version;
+	int chip_version = dev_priv->vbios.chip_version;
 	bool nv3035 = chip_version == 0x30 || chip_version == 0x35;
 	uint32_t reg2 = reg1 + ((reg1 == NV_RAMDAC_VPLL2) ? 0x5c : 0x70);
 	uint32_t oldpll1 = NVReadRAMDAC(dev, 0, reg1);
@@ -374,7 +374,7 @@
 		  struct nouveau_pll_vals *pv)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int cv = dev_priv->vbios->chip_version;
+	int cv = dev_priv->vbios.chip_version;
 
 	if (cv == 0x30 || cv == 0x31 || cv == 0x35 || cv == 0x36 ||
 	    cv >= 0x40) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_i2c.c b/drivers/gpu/drm/nouveau/nouveau_i2c.c
index 70e994d..88583e7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_i2c.c
+++ b/drivers/gpu/drm/nouveau/nouveau_i2c.c
@@ -254,16 +254,16 @@
 nouveau_i2c_find(struct drm_device *dev, int index)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 
-	if (index > DCB_MAX_NUM_I2C_ENTRIES)
+	if (index >= DCB_MAX_NUM_I2C_ENTRIES)
 		return NULL;
 
-	if (!bios->bdcb.dcb.i2c[index].chan) {
-		if (nouveau_i2c_init(dev, &bios->bdcb.dcb.i2c[index], index))
+	if (!bios->dcb.i2c[index].chan) {
+		if (nouveau_i2c_init(dev, &bios->dcb.i2c[index], index))
 			return NULL;
 	}
 
-	return bios->bdcb.dcb.i2c[index].chan;
+	return bios->dcb.i2c[index].chan;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
index 447f9f6..95220dd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
@@ -691,11 +691,14 @@
 	struct drm_device *dev = (struct drm_device *)arg;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	uint32_t status, fbdev_flags = 0;
+	unsigned long flags;
 
 	status = nv_rd32(dev, NV03_PMC_INTR_0);
 	if (!status)
 		return IRQ_NONE;
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	if (dev_priv->fbdev_info) {
 		fbdev_flags = dev_priv->fbdev_info->flags;
 		dev_priv->fbdev_info->flags |= FBINFO_HWACCEL_DISABLED;
@@ -733,5 +736,7 @@
 	if (dev_priv->fbdev_info)
 		dev_priv->fbdev_info->flags = fbdev_flags;
 
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
 	return IRQ_HANDLED;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index d99dc08..9537f3e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -61,11 +61,8 @@
 
 	chan->notifier_bo = ntfy;
 out_err:
-	if (ret) {
-		mutex_lock(&dev->struct_mutex);
-		drm_gem_object_unreference(ntfy->gem);
-		mutex_unlock(&dev->struct_mutex);
-	}
+	if (ret)
+		drm_gem_object_unreference_unlocked(ntfy->gem);
 
 	return ret;
 }
@@ -81,8 +78,8 @@
 	nouveau_bo_unmap(chan->notifier_bo);
 	mutex_lock(&dev->struct_mutex);
 	nouveau_bo_unpin(chan->notifier_bo);
-	drm_gem_object_unreference(chan->notifier_bo->gem);
 	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(chan->notifier_bo->gem);
 	nouveau_mem_takedown(&chan->notifier_heap);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index a4851af..eb8f084 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -29,6 +29,7 @@
 #include "drm_sarea.h"
 #include "drm_crtc_helper.h"
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
@@ -371,6 +372,30 @@
 	return ret;
 }
 
+static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
+					 enum vga_switcheroo_state state)
+{
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_ERR "VGA switcheroo: switched nouveau on\n");
+		nouveau_pci_resume(pdev);
+	} else {
+		printk(KERN_ERR "VGA switcheroo: switched nouveau off\n");
+		nouveau_pci_suspend(pdev, pmm);
+	}
+}
+
+static bool nouveau_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
 int
 nouveau_card_init(struct drm_device *dev)
 {
@@ -384,6 +409,8 @@
 		return 0;
 
 	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
+	vga_switcheroo_register_client(dev->pdev, nouveau_switcheroo_set_state,
+				       nouveau_switcheroo_can_switch);
 
 	/* Initialise internal driver API hooks */
 	ret = nouveau_init_engine_ptrs(dev);
@@ -391,6 +418,7 @@
 		goto out;
 	engine = &dev_priv->engine;
 	dev_priv->init_state = NOUVEAU_CARD_INIT_FAILED;
+	spin_lock_init(&dev_priv->context_switch_lock);
 
 	/* Parse BIOS tables / Run init tables if card not POSTed */
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
@@ -617,11 +645,6 @@
 	NV_DEBUG(dev, "vendor: 0x%X device: 0x%X class: 0x%X\n",
 		 dev->pci_vendor, dev->pci_device, dev->pdev->class);
 
-	dev_priv->acpi_dsm = nouveau_dsm_probe(dev);
-
-	if (dev_priv->acpi_dsm)
-		nouveau_hybrid_setup(dev);
-
 	dev_priv->wq = create_workqueue("nouveau");
 	if (!dev_priv->wq)
 		return -EINVAL;
@@ -776,13 +799,6 @@
 	return 0;
 }
 
-int
-nouveau_ioctl_card_init(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	return nouveau_card_init(dev);
-}
-
 int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 						struct drm_file *file_priv)
 {
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index d2f143e..a1d1ebb 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -926,9 +926,7 @@
 	nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.offset);
 	nv_crtc->cursor.show(nv_crtc, true);
 out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gem);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
index 1d73b15..1cb19e3 100644
--- a/drivers/gpu/drm/nouveau/nv04_dac.c
+++ b/drivers/gpu/drm/nouveau/nv04_dac.c
@@ -230,13 +230,13 @@
 	if (dcb->type == OUTPUT_TV) {
 		testval = RGB_TEST_DATA(0xa0, 0xa0, 0xa0);
 
-		if (dev_priv->vbios->tvdactestval)
-			testval = dev_priv->vbios->tvdactestval;
+		if (dev_priv->vbios.tvdactestval)
+			testval = dev_priv->vbios.tvdactestval;
 	} else {
 		testval = RGB_TEST_DATA(0x140, 0x140, 0x140); /* 0x94050140 */
 
-		if (dev_priv->vbios->dactestval)
-			testval = dev_priv->vbios->dactestval;
+		if (dev_priv->vbios.dactestval)
+			testval = dev_priv->vbios.dactestval;
 	}
 
 	saved_rtest_ctrl = NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index 483f875..41634d4 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -269,10 +269,10 @@
 	regp->fp_horiz_regs[FP_TOTAL] = output_mode->htotal - 1;
 	if (!nv_gf4_disp_arch(dev) ||
 	    (output_mode->hsync_start - output_mode->hdisplay) >=
-					dev_priv->vbios->digital_min_front_porch)
+					dev_priv->vbios.digital_min_front_porch)
 		regp->fp_horiz_regs[FP_CRTC] = output_mode->hdisplay;
 	else
-		regp->fp_horiz_regs[FP_CRTC] = output_mode->hsync_start - dev_priv->vbios->digital_min_front_porch - 1;
+		regp->fp_horiz_regs[FP_CRTC] = output_mode->hsync_start - dev_priv->vbios.digital_min_front_porch - 1;
 	regp->fp_horiz_regs[FP_SYNC_START] = output_mode->hsync_start - 1;
 	regp->fp_horiz_regs[FP_SYNC_END] = output_mode->hsync_end - 1;
 	regp->fp_horiz_regs[FP_VALID_START] = output_mode->hskew;
diff --git a/drivers/gpu/drm/nouveau/nv04_display.c b/drivers/gpu/drm/nouveau/nv04_display.c
index ef77215..c7898b4 100644
--- a/drivers/gpu/drm/nouveau/nv04_display.c
+++ b/drivers/gpu/drm/nouveau/nv04_display.c
@@ -93,10 +93,9 @@
 nv04_display_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct parsed_dcb *dcb = dev_priv->vbios->dcb;
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
 	struct drm_encoder *encoder;
 	struct drm_crtc *crtc;
-	uint16_t connector[16] = { 0 };
 	int i, ret;
 
 	NV_DEBUG_KMS(dev, "\n");
@@ -154,52 +153,10 @@
 
 		if (ret)
 			continue;
-
-		connector[dcbent->connector] |= (1 << dcbent->type);
 	}
 
-	for (i = 0; i < dcb->entries; i++) {
-		struct dcb_entry *dcbent = &dcb->entry[i];
-		uint16_t encoders;
-		int type;
-
-		encoders = connector[dcbent->connector];
-		if (!(encoders & (1 << dcbent->type)))
-			continue;
-		connector[dcbent->connector] = 0;
-
-		switch (dcbent->type) {
-		case OUTPUT_ANALOG:
-			if (!MULTIPLE_ENCODERS(encoders))
-				type = DRM_MODE_CONNECTOR_VGA;
-			else
-				type = DRM_MODE_CONNECTOR_DVII;
-			break;
-		case OUTPUT_TMDS:
-			if (!MULTIPLE_ENCODERS(encoders))
-				type = DRM_MODE_CONNECTOR_DVID;
-			else
-				type = DRM_MODE_CONNECTOR_DVII;
-			break;
-		case OUTPUT_LVDS:
-			type = DRM_MODE_CONNECTOR_LVDS;
-#if 0
-			/* don't create i2c adapter when lvds ddc not allowed */
-			if (dcbent->lvdsconf.use_straps_for_mode ||
-			    dev_priv->vbios->fp_no_ddc)
-				i2c_index = 0xf;
-#endif
-			break;
-		case OUTPUT_TV:
-			type = DRM_MODE_CONNECTOR_TV;
-			break;
-		default:
-			type = DRM_MODE_CONNECTOR_Unknown;
-			continue;
-		}
-
-		nouveau_connector_create(dev, dcbent->connector, type);
-	}
+	for (i = 0; i < dcb->connector.entries; i++)
+		nouveau_connector_create(dev, &dcb->connector.entry[i]);
 
 	/* Save previous state */
 	NVLockVgaCrtcs(dev, false);
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index fd01caa..3da90c2 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -118,7 +118,7 @@
 		return;
 	}
 
-	width = (image->width + 31) & ~31;
+	width = ALIGN(image->width, 32);
 	dsize = (width * image->height) >> 5;
 
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
index f31347b..66fe559 100644
--- a/drivers/gpu/drm/nouveau/nv04_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
@@ -117,6 +117,7 @@
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 	int ret;
 
 	ret = nouveau_gpuobj_new_fake(dev, NV04_RAMFC(chan->id), ~0,
@@ -127,6 +128,8 @@
 	if (ret)
 		return ret;
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	/* Setup initial state */
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	RAMFC_WR(DMA_PUT, chan->pushbuf_base);
@@ -144,6 +147,8 @@
 	/* enable the fifo dma operation */
 	nv_wr32(dev, NV04_PFIFO_MODE,
 		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
+
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_tv.c b/drivers/gpu/drm/nouveau/nv04_tv.c
index 9c63099..c4e3404 100644
--- a/drivers/gpu/drm/nouveau/nv04_tv.c
+++ b/drivers/gpu/drm/nouveau/nv04_tv.c
@@ -262,7 +262,7 @@
 	nv_encoder->or = ffs(entry->or) - 1;
 
 	/* Run the slave-specific initialization */
-	adap = &dev_priv->vbios->dcb->i2c[i2c_index].chan->adapter;
+	adap = &dev_priv->vbios.dcb.i2c[i2c_index].chan->adapter;
 
 	was_locked = NVLockVgaCrtcs(dev, false);
 
diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
index 21ac6e4..74c8803 100644
--- a/drivers/gpu/drm/nouveau/nv17_tv.c
+++ b/drivers/gpu/drm/nouveau/nv17_tv.c
@@ -45,8 +45,8 @@
 
 #define RGB_TEST_DATA(r, g, b) (r << 0 | g << 10 | b << 20)
 	testval = RGB_TEST_DATA(0x82, 0xeb, 0x82);
-	if (dev_priv->vbios->tvdactestval)
-		testval = dev_priv->vbios->tvdactestval;
+	if (dev_priv->vbios.tvdactestval)
+		testval = dev_priv->vbios.tvdactestval;
 
 	dacclk = NVReadRAMDAC(dev, 0, NV_PRAMDAC_DACCLK + regoffset);
 	head = (dacclk & 0x100) >> 8;
@@ -367,7 +367,7 @@
 			     !enc->crtc &&
 			     nv04_dfp_get_bound_head(dev, dcb) == head) {
 				nv04_dfp_bind_head(dev, dcb, head ^ 1,
-						dev_priv->VBIOS.fp.dual_link);
+						dev_priv->vbios.fp.dual_link);
 			}
 		}
 
diff --git a/drivers/gpu/drm/nouveau/nv40_fifo.c b/drivers/gpu/drm/nouveau/nv40_fifo.c
index b4f19cc..6b2ef4a 100644
--- a/drivers/gpu/drm/nouveau/nv40_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv40_fifo.c
@@ -37,6 +37,7 @@
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	uint32_t fc = NV40_RAMFC(chan->id);
+	unsigned long flags;
 	int ret;
 
 	ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(chan->id), ~0,
@@ -45,6 +46,8 @@
 	if (ret)
 		return ret;
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	nv_wi32(dev, fc +  0, chan->pushbuf_base);
 	nv_wi32(dev, fc +  4, chan->pushbuf_base);
@@ -63,6 +66,8 @@
 	/* enable the fifo dma operation */
 	nv_wr32(dev, NV04_PFIFO_MODE,
 		nv_rd32(dev, NV04_PFIFO_MODE) | (1 << chan->id));
+
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index d1a651e..cfabeb9 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -358,9 +358,7 @@
 	nv_crtc->cursor.show(nv_crtc, true);
 
 out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gem);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_dac.c b/drivers/gpu/drm/nouveau/nv50_dac.c
index f08f042..1fd9537 100644
--- a/drivers/gpu/drm/nouveau/nv50_dac.c
+++ b/drivers/gpu/drm/nouveau/nv50_dac.c
@@ -79,8 +79,8 @@
 	}
 
 	/* Use bios provided value if possible. */
-	if (dev_priv->vbios->dactestval) {
-		load_pattern = dev_priv->vbios->dactestval;
+	if (dev_priv->vbios.dactestval) {
+		load_pattern = dev_priv->vbios.dactestval;
 		NV_DEBUG_KMS(dev, "Using bios provided load_pattern of %d\n",
 			  load_pattern);
 	} else {
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 90f0bf5..61a89f2 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -370,9 +370,7 @@
 		struct nouveau_connector *conn = nouveau_connector(connector);
 		struct dcb_gpio_entry *gpio;
 
-		if (connector->connector_type != DRM_MODE_CONNECTOR_DVII &&
-		    connector->connector_type != DRM_MODE_CONNECTOR_DVID &&
-		    connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort)
+		if (conn->dcb->gpio_tag == 0xff)
 			continue;
 
 		gpio = nouveau_bios_gpio_entry(dev, conn->dcb->gpio_tag);
@@ -465,8 +463,7 @@
 int nv50_display_create(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct parsed_dcb *dcb = dev_priv->vbios->dcb;
-	uint32_t connector[16] = {};
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
 	int ret, i;
 
 	NV_DEBUG_KMS(dev, "\n");
@@ -522,44 +519,13 @@
 			NV_WARN(dev, "DCB encoder %d unknown\n", entry->type);
 			continue;
 		}
-
-		connector[entry->connector] |= (1 << entry->type);
 	}
 
-	/* It appears that DCB 3.0+ VBIOS has a connector table, however,
-	 * I'm not 100% certain how to decode it correctly yet so just
-	 * look at what encoders are present on each connector index and
-	 * attempt to derive the connector type from that.
-	 */
-	for (i = 0 ; i < dcb->entries; i++) {
-		struct dcb_entry *entry = &dcb->entry[i];
-		uint16_t encoders;
-		int type;
-
-		encoders = connector[entry->connector];
-		if (!(encoders & (1 << entry->type)))
+	for (i = 0 ; i < dcb->connector.entries; i++) {
+		if (i != 0 && dcb->connector.entry[i].index ==
+			      dcb->connector.entry[i - 1].index)
 			continue;
-		connector[entry->connector] = 0;
-
-		if (encoders & (1 << OUTPUT_DP)) {
-			type = DRM_MODE_CONNECTOR_DisplayPort;
-		} else if (encoders & (1 << OUTPUT_TMDS)) {
-			if (encoders & (1 << OUTPUT_ANALOG))
-				type = DRM_MODE_CONNECTOR_DVII;
-			else
-				type = DRM_MODE_CONNECTOR_DVID;
-		} else if (encoders & (1 << OUTPUT_ANALOG)) {
-			type = DRM_MODE_CONNECTOR_VGA;
-		} else if (encoders & (1 << OUTPUT_LVDS)) {
-			type = DRM_MODE_CONNECTOR_LVDS;
-		} else {
-			type = DRM_MODE_CONNECTOR_Unknown;
-		}
-
-		if (type == DRM_MODE_CONNECTOR_Unknown)
-			continue;
-
-		nouveau_connector_create(dev, entry->connector, type);
+		nouveau_connector_create(dev, &dcb->connector.entry[i]);
 	}
 
 	ret = nv50_display_init(dev);
@@ -667,8 +633,8 @@
 		return -1;
 	}
 
-	for (i = 0; i < dev_priv->vbios->dcb->entries; i++) {
-		struct dcb_entry *dcbent = &dev_priv->vbios->dcb->entry[i];
+	for (i = 0; i < dev_priv->vbios.dcb.entries; i++) {
+		struct dcb_entry *dcbent = &dev_priv->vbios.dcb.entry[i];
 
 		if (dcbent->type != type)
 			continue;
@@ -692,7 +658,7 @@
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_connector *nv_connector = NULL;
 	struct drm_encoder *encoder;
-	struct nvbios *bios = &dev_priv->VBIOS;
+	struct nvbios *bios = &dev_priv->vbios;
 	uint32_t mc, script = 0, or;
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
@@ -710,7 +676,7 @@
 	switch (dcbent->type) {
 	case OUTPUT_LVDS:
 		script = (mc >> 8) & 0xf;
-		if (bios->pub.fp_no_ddc) {
+		if (bios->fp_no_ddc) {
 			if (bios->fp.dual_link)
 				script |= 0x0100;
 			if (bios->fp.if_is_24bit)
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index 0f57cdf..993c712 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -109,7 +109,7 @@
 		return;
 	}
 
-	width = (image->width + 31) & ~31;
+	width = ALIGN(image->width, 32);
 	dwords = (width * image->height) >> 5;
 
 	BEGIN_RING(chan, NvSub2D, 0x0814, 2);
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index 204a79f..e20c0e2 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -243,6 +243,7 @@
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ramfc = NULL;
+	unsigned long flags;
 	int ret;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
@@ -278,19 +279,21 @@
 			return ret;
 	}
 
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+
 	dev_priv->engine.instmem.prepare_access(dev, true);
 
-	nv_wo32(dev, ramfc, 0x08/4, chan->pushbuf_base);
-	nv_wo32(dev, ramfc, 0x10/4, chan->pushbuf_base);
 	nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4);
 	nv_wo32(dev, ramfc, 0x80/4, (0xc << 24) | (chan->ramht->instance >> 4));
-	nv_wo32(dev, ramfc, 0x3c/4, 0x00086078);
 	nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff);
 	nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff);
 	nv_wo32(dev, ramfc, 0x40/4, 0x00000000);
 	nv_wo32(dev, ramfc, 0x7c/4, 0x30000001);
 	nv_wo32(dev, ramfc, 0x78/4, 0x00000000);
-	nv_wo32(dev, ramfc, 0x4c/4, 0xffffffff);
+	nv_wo32(dev, ramfc, 0x3c/4, 0x403f6078);
+	nv_wo32(dev, ramfc, 0x50/4, chan->pushbuf_base +
+				    chan->dma.ib_base * 4);
+	nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16);
 
 	if (!IS_G80) {
 		nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);
@@ -306,10 +309,12 @@
 	ret = nv50_fifo_channel_enable(dev, chan->id, false);
 	if (ret) {
 		NV_ERROR(dev, "error enabling ch%d: %d\n", chan->id, ret);
+		spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
 		return ret;
 	}
 
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index 6d50480..857a096 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -28,30 +28,7 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 
-MODULE_FIRMWARE("nouveau/nv50.ctxprog");
-MODULE_FIRMWARE("nouveau/nv50.ctxvals");
-MODULE_FIRMWARE("nouveau/nv84.ctxprog");
-MODULE_FIRMWARE("nouveau/nv84.ctxvals");
-MODULE_FIRMWARE("nouveau/nv86.ctxprog");
-MODULE_FIRMWARE("nouveau/nv86.ctxvals");
-MODULE_FIRMWARE("nouveau/nv92.ctxprog");
-MODULE_FIRMWARE("nouveau/nv92.ctxvals");
-MODULE_FIRMWARE("nouveau/nv94.ctxprog");
-MODULE_FIRMWARE("nouveau/nv94.ctxvals");
-MODULE_FIRMWARE("nouveau/nv96.ctxprog");
-MODULE_FIRMWARE("nouveau/nv96.ctxvals");
-MODULE_FIRMWARE("nouveau/nv98.ctxprog");
-MODULE_FIRMWARE("nouveau/nv98.ctxvals");
-MODULE_FIRMWARE("nouveau/nva0.ctxprog");
-MODULE_FIRMWARE("nouveau/nva0.ctxvals");
-MODULE_FIRMWARE("nouveau/nva5.ctxprog");
-MODULE_FIRMWARE("nouveau/nva5.ctxvals");
-MODULE_FIRMWARE("nouveau/nva8.ctxprog");
-MODULE_FIRMWARE("nouveau/nva8.ctxvals");
-MODULE_FIRMWARE("nouveau/nvaa.ctxprog");
-MODULE_FIRMWARE("nouveau/nvaa.ctxvals");
-MODULE_FIRMWARE("nouveau/nvac.ctxprog");
-MODULE_FIRMWARE("nouveau/nvac.ctxvals");
+#include "nouveau_grctx.h"
 
 #define IS_G80 ((dev_priv->chipset & 0xf0) == 0x50)
 
@@ -111,9 +88,34 @@
 
 	NV_DEBUG(dev, "\n");
 
-	nouveau_grctx_prog_load(dev);
-	if (!dev_priv->engine.graph.ctxprog)
-		dev_priv->engine.graph.accel_blocked = true;
+	if (nouveau_ctxfw) {
+		nouveau_grctx_prog_load(dev);
+		dev_priv->engine.graph.grctx_size = 0x70000;
+	}
+	if (!dev_priv->engine.graph.ctxprog) {
+		struct nouveau_grctx ctx = {};
+		uint32_t *cp = kmalloc(512 * 4, GFP_KERNEL);
+		int i;
+		if (!cp) {
+			NV_ERROR(dev, "Couldn't alloc ctxprog! Disabling acceleration.\n");
+			dev_priv->engine.graph.accel_blocked = true;
+			return 0;
+		}
+		ctx.dev = dev;
+		ctx.mode = NOUVEAU_GRCTX_PROG;
+		ctx.data = cp;
+		ctx.ctxprog_max = 512;
+		if (!nv50_grctx_init(&ctx)) {
+			dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4;
+
+			nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
+			for (i = 0; i < ctx.ctxprog_len; i++)
+				nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]);
+		} else {
+			dev_priv->engine.graph.accel_blocked = true;
+		}
+		kfree(cp);
+	}
 
 	nv_wr32(dev, 0x400320, 4);
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0);
@@ -193,13 +195,13 @@
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
 	struct nouveau_gpuobj *ctx;
-	uint32_t grctx_size = 0x70000;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	int hdr, ret;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, grctx_size, 0x1000,
-				     NVOBJ_FLAG_ZERO_ALLOC |
+	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+				     0x1000, NVOBJ_FLAG_ZERO_ALLOC |
 				     NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
 	if (ret)
 		return ret;
@@ -209,7 +211,7 @@
 	dev_priv->engine.instmem.prepare_access(dev, true);
 	nv_wo32(dev, ramin, (hdr + 0x00)/4, 0x00190002);
 	nv_wo32(dev, ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance +
-					   grctx_size - 1);
+					   pgraph->grctx_size - 1);
 	nv_wo32(dev, ramin, (hdr + 0x08)/4, chan->ramin_grctx->instance);
 	nv_wo32(dev, ramin, (hdr + 0x0c)/4, 0);
 	nv_wo32(dev, ramin, (hdr + 0x10)/4, 0);
@@ -217,7 +219,15 @@
 	dev_priv->engine.instmem.finish_access(dev);
 
 	dev_priv->engine.instmem.prepare_access(dev, true);
-	nouveau_grctx_vals_load(dev, ctx);
+	if (!pgraph->ctxprog) {
+		struct nouveau_grctx ctx = {};
+		ctx.dev = chan->dev;
+		ctx.mode = NOUVEAU_GRCTX_VALS;
+		ctx.data = chan->ramin_grctx->gpuobj;
+		nv50_grctx_init(&ctx);
+	} else {
+		nouveau_grctx_vals_load(dev, ctx);
+	}
 	nv_wo32(dev, ctx, 0x00000/4, chan->ramin->instance >> 12);
 	if ((dev_priv->chipset & 0xf0) == 0xa0)
 		nv_wo32(dev, ctx, 0x00004/4, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
new file mode 100644
index 0000000..d105fcd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
@@ -0,0 +1,2367 @@
+/*
+ * Copyright 2009 Marcin Kościelnicki
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define CP_FLAG_CLEAR                 0
+#define CP_FLAG_SET                   1
+#define CP_FLAG_SWAP_DIRECTION        ((0 * 32) + 0)
+#define CP_FLAG_SWAP_DIRECTION_LOAD   0
+#define CP_FLAG_SWAP_DIRECTION_SAVE   1
+#define CP_FLAG_UNK01                 ((0 * 32) + 1)
+#define CP_FLAG_UNK01_CLEAR           0
+#define CP_FLAG_UNK01_SET             1
+#define CP_FLAG_UNK03                 ((0 * 32) + 3)
+#define CP_FLAG_UNK03_CLEAR           0
+#define CP_FLAG_UNK03_SET             1
+#define CP_FLAG_USER_SAVE             ((0 * 32) + 5)
+#define CP_FLAG_USER_SAVE_NOT_PENDING 0
+#define CP_FLAG_USER_SAVE_PENDING     1
+#define CP_FLAG_USER_LOAD             ((0 * 32) + 6)
+#define CP_FLAG_USER_LOAD_NOT_PENDING 0
+#define CP_FLAG_USER_LOAD_PENDING     1
+#define CP_FLAG_UNK0B                 ((0 * 32) + 0xb)
+#define CP_FLAG_UNK0B_CLEAR           0
+#define CP_FLAG_UNK0B_SET             1
+#define CP_FLAG_UNK1D                 ((0 * 32) + 0x1d)
+#define CP_FLAG_UNK1D_CLEAR           0
+#define CP_FLAG_UNK1D_SET             1
+#define CP_FLAG_UNK20                 ((1 * 32) + 0)
+#define CP_FLAG_UNK20_CLEAR           0
+#define CP_FLAG_UNK20_SET             1
+#define CP_FLAG_STATUS                ((2 * 32) + 0)
+#define CP_FLAG_STATUS_BUSY           0
+#define CP_FLAG_STATUS_IDLE           1
+#define CP_FLAG_AUTO_SAVE             ((2 * 32) + 4)
+#define CP_FLAG_AUTO_SAVE_NOT_PENDING 0
+#define CP_FLAG_AUTO_SAVE_PENDING     1
+#define CP_FLAG_AUTO_LOAD             ((2 * 32) + 5)
+#define CP_FLAG_AUTO_LOAD_NOT_PENDING 0
+#define CP_FLAG_AUTO_LOAD_PENDING     1
+#define CP_FLAG_XFER                  ((2 * 32) + 11)
+#define CP_FLAG_XFER_IDLE             0
+#define CP_FLAG_XFER_BUSY             1
+#define CP_FLAG_NEWCTX                ((2 * 32) + 12)
+#define CP_FLAG_NEWCTX_BUSY           0
+#define CP_FLAG_NEWCTX_DONE           1
+#define CP_FLAG_ALWAYS                ((2 * 32) + 13)
+#define CP_FLAG_ALWAYS_FALSE          0
+#define CP_FLAG_ALWAYS_TRUE           1
+
+#define CP_CTX                   0x00100000
+#define CP_CTX_COUNT             0x000f0000
+#define CP_CTX_COUNT_SHIFT               16
+#define CP_CTX_REG               0x00003fff
+#define CP_LOAD_SR               0x00200000
+#define CP_LOAD_SR_VALUE         0x000fffff
+#define CP_BRA                   0x00400000
+#define CP_BRA_IP                0x0001ff00
+#define CP_BRA_IP_SHIFT                   8
+#define CP_BRA_IF_CLEAR          0x00000080
+#define CP_BRA_FLAG              0x0000007f
+#define CP_WAIT                  0x00500000
+#define CP_WAIT_SET              0x00000080
+#define CP_WAIT_FLAG             0x0000007f
+#define CP_SET                   0x00700000
+#define CP_SET_1                 0x00000080
+#define CP_SET_FLAG              0x0000007f
+#define CP_NEWCTX                0x00600004
+#define CP_NEXT_TO_SWAP          0x00600005
+#define CP_SET_CONTEXT_POINTER   0x00600006
+#define CP_SET_XFER_POINTER      0x00600007
+#define CP_ENABLE                0x00600009
+#define CP_END                   0x0060000c
+#define CP_NEXT_TO_CURRENT       0x0060000d
+#define CP_DISABLE1              0x0090ffff
+#define CP_DISABLE2              0x0091ffff
+#define CP_XFER_1      0x008000ff
+#define CP_XFER_2      0x008800ff
+#define CP_SEEK_1      0x00c000ff
+#define CP_SEEK_2      0x00c800ff
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_grctx.h"
+
+/*
+ * This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's
+ * the GPU itself that does context-switching, but it needs a special
+ * microcode to do it. And it's the driver's task to supply this microcode,
+ * further known as ctxprog, as well as the initial context values, known
+ * as ctxvals.
+ *
+ * Without ctxprog, you cannot switch contexts. Not even in software, since
+ * the majority of context [xfer strands] isn't accessible directly. You're
+ * stuck with a single channel, and you also suffer all the problems resulting
+ * from missing ctxvals, since you cannot load them.
+ *
+ * Without ctxvals, you're stuck with PGRAPH's default context. It's enough to
+ * run 2d operations, but trying to utilise 3d or CUDA will just lock you up,
+ * since you don't have... some sort of needed setup.
+ *
+ * Nouveau will just disable acceleration if not given ctxprog + ctxvals, since
+ * it's too much hassle to handle no-ctxprog as a special case.
+ */
+
+/*
+ * How ctxprogs work.
+ *
+ * The ctxprog is written in its own kind of microcode, with very small and
+ * crappy set of available commands. You upload it to a small [512 insns]
+ * area of memory on PGRAPH, and it'll be run when PFIFO wants PGRAPH to
+ * switch channel. or when the driver explicitely requests it. Stuff visible
+ * to ctxprog consists of: PGRAPH MMIO registers, PGRAPH context strands,
+ * the per-channel context save area in VRAM [known as ctxvals or grctx],
+ * 4 flags registers, a scratch register, two grctx pointers, plus many
+ * random poorly-understood details.
+ *
+ * When ctxprog runs, it's supposed to check what operations are asked of it,
+ * save old context if requested, optionally reset PGRAPH and switch to the
+ * new channel, and load the new context. Context consists of three major
+ * parts: subset of MMIO registers and two "xfer areas".
+ */
+
+/* TODO:
+ *  - document unimplemented bits compared to nvidia
+ *  - NVAx: make a TP subroutine, use it.
+ *  - use 0x4008fc instead of 0x1540?
+ */
+
+enum cp_label {
+	cp_check_load = 1,
+	cp_setup_auto_load,
+	cp_setup_load,
+	cp_setup_save,
+	cp_swap_state,
+	cp_prepare_exit,
+	cp_exit,
+};
+
+static void nv50_graph_construct_mmio(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_xfer1(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_xfer2(struct nouveau_grctx *ctx);
+
+/* Main function: construct the ctxprog skeleton, call the other functions. */
+
+int
+nv50_grctx_init(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+
+	switch (dev_priv->chipset) {
+	case 0x50:
+	case 0x84:
+	case 0x86:
+	case 0x92:
+	case 0x94:
+	case 0x96:
+	case 0x98:
+	case 0xa0:
+	case 0xa5:
+	case 0xa8:
+	case 0xaa:
+	case 0xac:
+		break;
+	default:
+		NV_ERROR(ctx->dev, "I don't know how to make a ctxprog for "
+				   "your NV%x card.\n", dev_priv->chipset);
+		NV_ERROR(ctx->dev, "Disabling acceleration. Please contact "
+				   "the devs.\n");
+		return -ENOSYS;
+	}
+	/* decide whether we're loading/unloading the context */
+	cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save);
+	cp_bra (ctx, USER_SAVE, PENDING, cp_setup_save);
+
+	cp_name(ctx, cp_check_load);
+	cp_bra (ctx, AUTO_LOAD, PENDING, cp_setup_auto_load);
+	cp_bra (ctx, USER_LOAD, PENDING, cp_setup_load);
+	cp_bra (ctx, ALWAYS, TRUE, cp_exit);
+
+	/* setup for context load */
+	cp_name(ctx, cp_setup_auto_load);
+	cp_out (ctx, CP_DISABLE1);
+	cp_out (ctx, CP_DISABLE2);
+	cp_out (ctx, CP_ENABLE);
+	cp_out (ctx, CP_NEXT_TO_SWAP);
+	cp_set (ctx, UNK01, SET);
+	cp_name(ctx, cp_setup_load);
+	cp_out (ctx, CP_NEWCTX);
+	cp_wait(ctx, NEWCTX, BUSY);
+	cp_set (ctx, UNK1D, CLEAR);
+	cp_set (ctx, SWAP_DIRECTION, LOAD);
+	cp_bra (ctx, UNK0B, SET, cp_prepare_exit);
+	cp_bra (ctx, ALWAYS, TRUE, cp_swap_state);
+
+	/* setup for context save */
+	cp_name(ctx, cp_setup_save);
+	cp_set (ctx, UNK1D, SET);
+	cp_wait(ctx, STATUS, BUSY);
+	cp_set (ctx, UNK01, SET);
+	cp_set (ctx, SWAP_DIRECTION, SAVE);
+
+	/* general PGRAPH state */
+	cp_name(ctx, cp_swap_state);
+	cp_set (ctx, UNK03, SET);
+	cp_pos (ctx, 0x00004/4);
+	cp_ctx (ctx, 0x400828, 1); /* needed. otherwise, flickering happens. */
+	cp_pos (ctx, 0x00100/4);
+	nv50_graph_construct_mmio(ctx);
+	nv50_graph_construct_xfer1(ctx);
+	nv50_graph_construct_xfer2(ctx);
+
+	cp_bra (ctx, SWAP_DIRECTION, SAVE, cp_check_load);
+
+	cp_set (ctx, UNK20, SET);
+	cp_set (ctx, SWAP_DIRECTION, SAVE); /* no idea why this is needed, but fixes at least one lockup. */
+	cp_lsr (ctx, ctx->ctxvals_base);
+	cp_out (ctx, CP_SET_XFER_POINTER);
+	cp_lsr (ctx, 4);
+	cp_out (ctx, CP_SEEK_1);
+	cp_out (ctx, CP_XFER_1);
+	cp_wait(ctx, XFER, BUSY);
+
+	/* pre-exit state updates */
+	cp_name(ctx, cp_prepare_exit);
+	cp_set (ctx, UNK01, CLEAR);
+	cp_set (ctx, UNK03, CLEAR);
+	cp_set (ctx, UNK1D, CLEAR);
+
+	cp_bra (ctx, USER_SAVE, PENDING, cp_exit);
+	cp_out (ctx, CP_NEXT_TO_CURRENT);
+
+	cp_name(ctx, cp_exit);
+	cp_set (ctx, USER_SAVE, NOT_PENDING);
+	cp_set (ctx, USER_LOAD, NOT_PENDING);
+	cp_out (ctx, CP_END);
+	ctx->ctxvals_pos += 0x400; /* padding... no idea why you need it */
+
+	return 0;
+}
+
+/*
+ * Constructs MMIO part of ctxprog and ctxvals. Just a matter of knowing which
+ * registers to save/restore and the default values for them.
+ */
+
+static void
+nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i, j;
+	int offset, base;
+	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
+
+	/* 0800 */
+	cp_ctx(ctx, 0x400808, 7);
+	gr_def(ctx, 0x400814, 0x00000030);
+	cp_ctx(ctx, 0x400834, 0x32);
+	if (dev_priv->chipset == 0x50) {
+		gr_def(ctx, 0x400834, 0xff400040);
+		gr_def(ctx, 0x400838, 0xfff00080);
+		gr_def(ctx, 0x40083c, 0xfff70090);
+		gr_def(ctx, 0x400840, 0xffe806a8);
+	}
+	gr_def(ctx, 0x400844, 0x00000002);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		gr_def(ctx, 0x400894, 0x00001000);
+	gr_def(ctx, 0x4008e8, 0x00000003);
+	gr_def(ctx, 0x4008ec, 0x00001000);
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x400908, 0xb);
+	else if (dev_priv->chipset < 0xa0)
+		cp_ctx(ctx, 0x400908, 0xc);
+	else
+		cp_ctx(ctx, 0x400908, 0xe);
+
+	if (dev_priv->chipset >= 0xa0)
+		cp_ctx(ctx, 0x400b00, 0x1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		cp_ctx(ctx, 0x400b10, 0x1);
+		gr_def(ctx, 0x400b10, 0x0001629d);
+		cp_ctx(ctx, 0x400b20, 0x1);
+		gr_def(ctx, 0x400b20, 0x0001629d);
+	}
+
+	/* 0C00 */
+	cp_ctx(ctx, 0x400c08, 0x2);
+	gr_def(ctx, 0x400c08, 0x0000fe0c);
+
+	/* 1000 */
+	if (dev_priv->chipset < 0xa0) {
+		cp_ctx(ctx, 0x401008, 0x4);
+		gr_def(ctx, 0x401014, 0x00001000);
+	} else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa) {
+		cp_ctx(ctx, 0x401008, 0x5);
+		gr_def(ctx, 0x401018, 0x00001000);
+	} else {
+		cp_ctx(ctx, 0x401008, 0x5);
+		gr_def(ctx, 0x401018, 0x00004000);
+	}
+
+	/* 1400 */
+	cp_ctx(ctx, 0x401400, 0x8);
+	cp_ctx(ctx, 0x401424, 0x3);
+	if (dev_priv->chipset == 0x50)
+		gr_def(ctx, 0x40142c, 0x0001fd87);
+	else
+		gr_def(ctx, 0x40142c, 0x00000187);
+	cp_ctx(ctx, 0x401540, 0x5);
+	gr_def(ctx, 0x401550, 0x00001018);
+
+	/* 1800 */
+	cp_ctx(ctx, 0x401814, 0x1);
+	gr_def(ctx, 0x401814, 0x000000ff);
+	if (dev_priv->chipset == 0x50) {
+		cp_ctx(ctx, 0x40181c, 0xe);
+		gr_def(ctx, 0x401850, 0x00000004);
+	} else if (dev_priv->chipset < 0xa0) {
+		cp_ctx(ctx, 0x40181c, 0xf);
+		gr_def(ctx, 0x401854, 0x00000004);
+	} else {
+		cp_ctx(ctx, 0x40181c, 0x13);
+		gr_def(ctx, 0x401864, 0x00000004);
+	}
+
+	/* 1C00 */
+	cp_ctx(ctx, 0x401c00, 0x1);
+	switch (dev_priv->chipset) {
+	case 0x50:
+		gr_def(ctx, 0x401c00, 0x0001005f);
+		break;
+	case 0x84:
+	case 0x86:
+	case 0x94:
+		gr_def(ctx, 0x401c00, 0x044d00df);
+		break;
+	case 0x92:
+	case 0x96:
+	case 0x98:
+	case 0xa0:
+	case 0xaa:
+	case 0xac:
+		gr_def(ctx, 0x401c00, 0x042500df);
+		break;
+	case 0xa5:
+	case 0xa8:
+		gr_def(ctx, 0x401c00, 0x142500df);
+		break;
+	}
+
+	/* 2400 */
+	cp_ctx(ctx, 0x402400, 0x1);
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x402408, 0x1);
+	else
+		cp_ctx(ctx, 0x402408, 0x2);
+	gr_def(ctx, 0x402408, 0x00000600);
+
+	/* 2800 */
+	cp_ctx(ctx, 0x402800, 0x1);
+	if (dev_priv->chipset == 0x50)
+		gr_def(ctx, 0x402800, 0x00000006);
+
+	/* 2C00 */
+	cp_ctx(ctx, 0x402c08, 0x6);
+	if (dev_priv->chipset != 0x50)
+		gr_def(ctx, 0x402c14, 0x01000000);
+	gr_def(ctx, 0x402c18, 0x000000ff);
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x402ca0, 0x1);
+	else
+		cp_ctx(ctx, 0x402ca0, 0x2);
+	if (dev_priv->chipset < 0xa0)
+		gr_def(ctx, 0x402ca0, 0x00000400);
+	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+		gr_def(ctx, 0x402ca0, 0x00000800);
+	else
+		gr_def(ctx, 0x402ca0, 0x00000400);
+	cp_ctx(ctx, 0x402cac, 0x4);
+
+	/* 3000 */
+	cp_ctx(ctx, 0x403004, 0x1);
+	gr_def(ctx, 0x403004, 0x00000001);
+
+	/* 3404 */
+	if (dev_priv->chipset >= 0xa0) {
+		cp_ctx(ctx, 0x403404, 0x1);
+		gr_def(ctx, 0x403404, 0x00000001);
+	}
+
+	/* 5000 */
+	cp_ctx(ctx, 0x405000, 0x1);
+	switch (dev_priv->chipset) {
+	case 0x50:
+		gr_def(ctx, 0x405000, 0x00300080);
+		break;
+	case 0x84:
+	case 0xa0:
+	case 0xa5:
+	case 0xa8:
+	case 0xaa:
+	case 0xac:
+		gr_def(ctx, 0x405000, 0x000e0080);
+		break;
+	case 0x86:
+	case 0x92:
+	case 0x94:
+	case 0x96:
+	case 0x98:
+		gr_def(ctx, 0x405000, 0x00000080);
+		break;
+	}
+	cp_ctx(ctx, 0x405014, 0x1);
+	gr_def(ctx, 0x405014, 0x00000004);
+	cp_ctx(ctx, 0x40501c, 0x1);
+	cp_ctx(ctx, 0x405024, 0x1);
+	cp_ctx(ctx, 0x40502c, 0x1);
+
+	/* 5400 or maybe 4800 */
+	if (dev_priv->chipset == 0x50) {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xea);
+	} else if (dev_priv->chipset < 0x94) {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xcb);
+	} else if (dev_priv->chipset < 0xa0) {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xcc);
+	} else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		offset = 0x404800;
+		cp_ctx(ctx, 0x404800, 0xda);
+	} else {
+		offset = 0x405400;
+		cp_ctx(ctx, 0x405400, 0xd4);
+	}
+	gr_def(ctx, offset + 0x0c, 0x00000002);
+	gr_def(ctx, offset + 0x10, 0x00000001);
+	if (dev_priv->chipset >= 0x94)
+		offset += 4;
+	gr_def(ctx, offset + 0x1c, 0x00000001);
+	gr_def(ctx, offset + 0x20, 0x00000100);
+	gr_def(ctx, offset + 0x38, 0x00000002);
+	gr_def(ctx, offset + 0x3c, 0x00000001);
+	gr_def(ctx, offset + 0x40, 0x00000001);
+	gr_def(ctx, offset + 0x50, 0x00000001);
+	gr_def(ctx, offset + 0x54, 0x003fffff);
+	gr_def(ctx, offset + 0x58, 0x00001fff);
+	gr_def(ctx, offset + 0x60, 0x00000001);
+	gr_def(ctx, offset + 0x64, 0x00000001);
+	gr_def(ctx, offset + 0x6c, 0x00000001);
+	gr_def(ctx, offset + 0x70, 0x00000001);
+	gr_def(ctx, offset + 0x74, 0x00000001);
+	gr_def(ctx, offset + 0x78, 0x00000004);
+	gr_def(ctx, offset + 0x7c, 0x00000001);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		offset += 4;
+	gr_def(ctx, offset + 0x80, 0x00000001);
+	gr_def(ctx, offset + 0x84, 0x00000001);
+	gr_def(ctx, offset + 0x88, 0x00000007);
+	gr_def(ctx, offset + 0x8c, 0x00000001);
+	gr_def(ctx, offset + 0x90, 0x00000007);
+	gr_def(ctx, offset + 0x94, 0x00000001);
+	gr_def(ctx, offset + 0x98, 0x00000001);
+	gr_def(ctx, offset + 0x9c, 0x00000001);
+	if (dev_priv->chipset == 0x50) {
+		 gr_def(ctx, offset + 0xb0, 0x00000001);
+		 gr_def(ctx, offset + 0xb4, 0x00000001);
+		 gr_def(ctx, offset + 0xbc, 0x00000001);
+		 gr_def(ctx, offset + 0xc0, 0x0000000a);
+		 gr_def(ctx, offset + 0xd0, 0x00000040);
+		 gr_def(ctx, offset + 0xd8, 0x00000002);
+		 gr_def(ctx, offset + 0xdc, 0x00000100);
+		 gr_def(ctx, offset + 0xe0, 0x00000001);
+		 gr_def(ctx, offset + 0xe4, 0x00000100);
+		 gr_def(ctx, offset + 0x100, 0x00000001);
+		 gr_def(ctx, offset + 0x124, 0x00000004);
+		 gr_def(ctx, offset + 0x13c, 0x00000001);
+		 gr_def(ctx, offset + 0x140, 0x00000100);
+		 gr_def(ctx, offset + 0x148, 0x00000001);
+		 gr_def(ctx, offset + 0x154, 0x00000100);
+		 gr_def(ctx, offset + 0x158, 0x00000001);
+		 gr_def(ctx, offset + 0x15c, 0x00000100);
+		 gr_def(ctx, offset + 0x164, 0x00000001);
+		 gr_def(ctx, offset + 0x170, 0x00000100);
+		 gr_def(ctx, offset + 0x174, 0x00000001);
+		 gr_def(ctx, offset + 0x17c, 0x00000001);
+		 gr_def(ctx, offset + 0x188, 0x00000002);
+		 gr_def(ctx, offset + 0x190, 0x00000001);
+		 gr_def(ctx, offset + 0x198, 0x00000001);
+		 gr_def(ctx, offset + 0x1ac, 0x00000003);
+		 offset += 0xd0;
+	} else {
+		gr_def(ctx, offset + 0xb0, 0x00000001);
+		gr_def(ctx, offset + 0xb4, 0x00000100);
+		gr_def(ctx, offset + 0xbc, 0x00000001);
+		gr_def(ctx, offset + 0xc8, 0x00000100);
+		gr_def(ctx, offset + 0xcc, 0x00000001);
+		gr_def(ctx, offset + 0xd0, 0x00000100);
+		gr_def(ctx, offset + 0xd8, 0x00000001);
+		gr_def(ctx, offset + 0xe4, 0x00000100);
+	}
+	gr_def(ctx, offset + 0xf8, 0x00000004);
+	gr_def(ctx, offset + 0xfc, 0x00000070);
+	gr_def(ctx, offset + 0x100, 0x00000080);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		offset += 4;
+	gr_def(ctx, offset + 0x114, 0x0000000c);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	gr_def(ctx, offset + 0x11c, 0x00000008);
+	gr_def(ctx, offset + 0x120, 0x00000014);
+	if (dev_priv->chipset == 0x50) {
+		gr_def(ctx, offset + 0x124, 0x00000026);
+		offset -= 0x18;
+	} else {
+		gr_def(ctx, offset + 0x128, 0x00000029);
+		gr_def(ctx, offset + 0x12c, 0x00000027);
+		gr_def(ctx, offset + 0x130, 0x00000026);
+		gr_def(ctx, offset + 0x134, 0x00000008);
+		gr_def(ctx, offset + 0x138, 0x00000004);
+		gr_def(ctx, offset + 0x13c, 0x00000027);
+	}
+	gr_def(ctx, offset + 0x148, 0x00000001);
+	gr_def(ctx, offset + 0x14c, 0x00000002);
+	gr_def(ctx, offset + 0x150, 0x00000003);
+	gr_def(ctx, offset + 0x154, 0x00000004);
+	gr_def(ctx, offset + 0x158, 0x00000005);
+	gr_def(ctx, offset + 0x15c, 0x00000006);
+	gr_def(ctx, offset + 0x160, 0x00000007);
+	gr_def(ctx, offset + 0x164, 0x00000001);
+	gr_def(ctx, offset + 0x1a8, 0x000000cf);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	gr_def(ctx, offset + 0x1d8, 0x00000080);
+	gr_def(ctx, offset + 0x1dc, 0x00000004);
+	gr_def(ctx, offset + 0x1e0, 0x00000004);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	else
+		gr_def(ctx, offset + 0x1e4, 0x00000003);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		gr_def(ctx, offset + 0x1ec, 0x00000003);
+		offset += 8;
+	}
+	gr_def(ctx, offset + 0x1e8, 0x00000001);
+	if (dev_priv->chipset == 0x50)
+		offset -= 4;
+	gr_def(ctx, offset + 0x1f4, 0x00000012);
+	gr_def(ctx, offset + 0x1f8, 0x00000010);
+	gr_def(ctx, offset + 0x1fc, 0x0000000c);
+	gr_def(ctx, offset + 0x200, 0x00000001);
+	gr_def(ctx, offset + 0x210, 0x00000004);
+	gr_def(ctx, offset + 0x214, 0x00000002);
+	gr_def(ctx, offset + 0x218, 0x00000004);
+	if (dev_priv->chipset >= 0xa0)
+		offset += 4;
+	gr_def(ctx, offset + 0x224, 0x003fffff);
+	gr_def(ctx, offset + 0x228, 0x00001fff);
+	if (dev_priv->chipset == 0x50)
+		offset -= 0x20;
+	else if (dev_priv->chipset >= 0xa0) {
+		gr_def(ctx, offset + 0x250, 0x00000001);
+		gr_def(ctx, offset + 0x254, 0x00000001);
+		gr_def(ctx, offset + 0x258, 0x00000002);
+		offset += 0x10;
+	}
+	gr_def(ctx, offset + 0x250, 0x00000004);
+	gr_def(ctx, offset + 0x254, 0x00000014);
+	gr_def(ctx, offset + 0x258, 0x00000001);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		offset += 4;
+	gr_def(ctx, offset + 0x264, 0x00000002);
+	if (dev_priv->chipset >= 0xa0)
+		offset += 8;
+	gr_def(ctx, offset + 0x270, 0x00000001);
+	gr_def(ctx, offset + 0x278, 0x00000002);
+	gr_def(ctx, offset + 0x27c, 0x00001000);
+	if (dev_priv->chipset == 0x50)
+		offset -= 0xc;
+	else {
+		gr_def(ctx, offset + 0x280, 0x00000e00);
+		gr_def(ctx, offset + 0x284, 0x00001000);
+		gr_def(ctx, offset + 0x288, 0x00001e00);
+	}
+	gr_def(ctx, offset + 0x290, 0x00000001);
+	gr_def(ctx, offset + 0x294, 0x00000001);
+	gr_def(ctx, offset + 0x298, 0x00000001);
+	gr_def(ctx, offset + 0x29c, 0x00000001);
+	gr_def(ctx, offset + 0x2a0, 0x00000001);
+	gr_def(ctx, offset + 0x2b0, 0x00000200);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		gr_def(ctx, offset + 0x2b4, 0x00000200);
+		offset += 4;
+	}
+	if (dev_priv->chipset < 0xa0) {
+		gr_def(ctx, offset + 0x2b8, 0x00000001);
+		gr_def(ctx, offset + 0x2bc, 0x00000070);
+		gr_def(ctx, offset + 0x2c0, 0x00000080);
+		gr_def(ctx, offset + 0x2cc, 0x00000001);
+		gr_def(ctx, offset + 0x2d0, 0x00000070);
+		gr_def(ctx, offset + 0x2d4, 0x00000080);
+	} else {
+		gr_def(ctx, offset + 0x2b8, 0x00000001);
+		gr_def(ctx, offset + 0x2bc, 0x000000f0);
+		gr_def(ctx, offset + 0x2c0, 0x000000ff);
+		gr_def(ctx, offset + 0x2cc, 0x00000001);
+		gr_def(ctx, offset + 0x2d0, 0x000000f0);
+		gr_def(ctx, offset + 0x2d4, 0x000000ff);
+		gr_def(ctx, offset + 0x2dc, 0x00000009);
+		offset += 4;
+	}
+	gr_def(ctx, offset + 0x2e4, 0x00000001);
+	gr_def(ctx, offset + 0x2e8, 0x000000cf);
+	gr_def(ctx, offset + 0x2f0, 0x00000001);
+	gr_def(ctx, offset + 0x300, 0x000000cf);
+	gr_def(ctx, offset + 0x308, 0x00000002);
+	gr_def(ctx, offset + 0x310, 0x00000001);
+	gr_def(ctx, offset + 0x318, 0x00000001);
+	gr_def(ctx, offset + 0x320, 0x000000cf);
+	gr_def(ctx, offset + 0x324, 0x000000cf);
+	gr_def(ctx, offset + 0x328, 0x00000001);
+
+	/* 6000? */
+	if (dev_priv->chipset == 0x50)
+		cp_ctx(ctx, 0x4063e0, 0x1);
+
+	/* 6800 */
+	if (dev_priv->chipset < 0x90) {
+		cp_ctx(ctx, 0x406814, 0x2b);
+		gr_def(ctx, 0x406818, 0x00000f80);
+		gr_def(ctx, 0x406860, 0x007f0080);
+		gr_def(ctx, 0x40689c, 0x007f0080);
+	} else {
+		cp_ctx(ctx, 0x406814, 0x4);
+		if (dev_priv->chipset == 0x98)
+			gr_def(ctx, 0x406818, 0x00000f80);
+		else
+			gr_def(ctx, 0x406818, 0x00001f80);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			gr_def(ctx, 0x40681c, 0x00000030);
+		cp_ctx(ctx, 0x406830, 0x3);
+	}
+
+	/* 7000: per-ROP group state */
+	for (i = 0; i < 8; i++) {
+		if (units & (1<<(i+16))) {
+			cp_ctx(ctx, 0x407000 + (i<<8), 3);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, 0x407000 + (i<<8), 0x1b74f820);
+			else if (dev_priv->chipset != 0xa5)
+				gr_def(ctx, 0x407000 + (i<<8), 0x3b74f821);
+			else
+				gr_def(ctx, 0x407000 + (i<<8), 0x7b74f821);
+			gr_def(ctx, 0x407004 + (i<<8), 0x89058001);
+
+			if (dev_priv->chipset == 0x50) {
+				cp_ctx(ctx, 0x407010 + (i<<8), 1);
+			} else if (dev_priv->chipset < 0xa0) {
+				cp_ctx(ctx, 0x407010 + (i<<8), 2);
+				gr_def(ctx, 0x407010 + (i<<8), 0x00001000);
+				gr_def(ctx, 0x407014 + (i<<8), 0x0000001f);
+			} else {
+				cp_ctx(ctx, 0x407010 + (i<<8), 3);
+				gr_def(ctx, 0x407010 + (i<<8), 0x00001000);
+				if (dev_priv->chipset != 0xa5)
+					gr_def(ctx, 0x407014 + (i<<8), 0x000000ff);
+				else
+					gr_def(ctx, 0x407014 + (i<<8), 0x000001ff);
+			}
+
+			cp_ctx(ctx, 0x407080 + (i<<8), 4);
+			if (dev_priv->chipset != 0xa5)
+				gr_def(ctx, 0x407080 + (i<<8), 0x027c10fa);
+			else
+				gr_def(ctx, 0x407080 + (i<<8), 0x827c10fa);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, 0x407084 + (i<<8), 0x000000c0);
+			else
+				gr_def(ctx, 0x407084 + (i<<8), 0x400000c0);
+			gr_def(ctx, 0x407088 + (i<<8), 0xb7892080);
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, 0x407094 + (i<<8), 1);
+			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
+				cp_ctx(ctx, 0x407094 + (i<<8), 3);
+			else {
+				cp_ctx(ctx, 0x407094 + (i<<8), 4);
+				gr_def(ctx, 0x4070a0 + (i<<8), 1);
+			}
+		}
+	}
+
+	cp_ctx(ctx, 0x407c00, 0x3);
+	if (dev_priv->chipset < 0x90)
+		gr_def(ctx, 0x407c00, 0x00010040);
+	else if (dev_priv->chipset < 0xa0)
+		gr_def(ctx, 0x407c00, 0x00390040);
+	else
+		gr_def(ctx, 0x407c00, 0x003d0040);
+	gr_def(ctx, 0x407c08, 0x00000022);
+	if (dev_priv->chipset >= 0xa0) {
+		cp_ctx(ctx, 0x407c10, 0x3);
+		cp_ctx(ctx, 0x407c20, 0x1);
+		cp_ctx(ctx, 0x407c2c, 0x1);
+	}
+
+	if (dev_priv->chipset < 0xa0) {
+		cp_ctx(ctx, 0x407d00, 0x9);
+	} else {
+		cp_ctx(ctx, 0x407d00, 0x15);
+	}
+	if (dev_priv->chipset == 0x98)
+		gr_def(ctx, 0x407d08, 0x00380040);
+	else {
+		if (dev_priv->chipset < 0x90)
+			gr_def(ctx, 0x407d08, 0x00010040);
+		else if (dev_priv->chipset < 0xa0)
+			gr_def(ctx, 0x407d08, 0x00390040);
+		else
+			gr_def(ctx, 0x407d08, 0x003d0040);
+		gr_def(ctx, 0x407d0c, 0x00000022);
+	}
+
+	/* 8000+: per-TP state */
+	for (i = 0; i < 10; i++) {
+		if (units & (1<<i)) {
+			if (dev_priv->chipset < 0xa0)
+				base = 0x408000 + (i<<12);
+			else
+				base = 0x408000 + (i<<11);
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0xc00;
+			else
+				offset = base + 0x80;
+			cp_ctx(ctx, offset + 0x00, 1);
+			gr_def(ctx, offset + 0x00, 0x0000ff0a);
+			cp_ctx(ctx, offset + 0x08, 1);
+
+			/* per-MP state */
+			for (j = 0; j < (dev_priv->chipset < 0xa0 ? 2 : 4); j++) {
+				if (!(units & (1 << (j+24)))) continue;
+				if (dev_priv->chipset < 0xa0)
+					offset = base + 0x200 + (j<<7);
+				else
+					offset = base + 0x100 + (j<<7);
+				cp_ctx(ctx, offset, 0x20);
+				gr_def(ctx, offset + 0x00, 0x01800000);
+				gr_def(ctx, offset + 0x04, 0x00160000);
+				gr_def(ctx, offset + 0x08, 0x01800000);
+				gr_def(ctx, offset + 0x18, 0x0003ffff);
+				switch (dev_priv->chipset) {
+				case 0x50:
+					gr_def(ctx, offset + 0x1c, 0x00080000);
+					break;
+				case 0x84:
+					gr_def(ctx, offset + 0x1c, 0x00880000);
+					break;
+				case 0x86:
+					gr_def(ctx, offset + 0x1c, 0x008c0000);
+					break;
+				case 0x92:
+				case 0x96:
+				case 0x98:
+					gr_def(ctx, offset + 0x1c, 0x118c0000);
+					break;
+				case 0x94:
+					gr_def(ctx, offset + 0x1c, 0x10880000);
+					break;
+				case 0xa0:
+				case 0xa5:
+					gr_def(ctx, offset + 0x1c, 0x310c0000);
+					break;
+				case 0xa8:
+				case 0xaa:
+				case 0xac:
+					gr_def(ctx, offset + 0x1c, 0x300c0000);
+					break;
+				}
+				gr_def(ctx, offset + 0x40, 0x00010401);
+				if (dev_priv->chipset == 0x50)
+					gr_def(ctx, offset + 0x48, 0x00000040);
+				else
+					gr_def(ctx, offset + 0x48, 0x00000078);
+				gr_def(ctx, offset + 0x50, 0x000000bf);
+				gr_def(ctx, offset + 0x58, 0x00001210);
+				if (dev_priv->chipset == 0x50)
+					gr_def(ctx, offset + 0x5c, 0x00000080);
+				else
+					gr_def(ctx, offset + 0x5c, 0x08000080);
+				if (dev_priv->chipset >= 0xa0)
+					gr_def(ctx, offset + 0x68, 0x0000003e);
+			}
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, base + 0x300, 0x4);
+			else
+				cp_ctx(ctx, base + 0x300, 0x5);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, base + 0x304, 0x00007070);
+			else if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, base + 0x304, 0x00027070);
+			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
+				gr_def(ctx, base + 0x304, 0x01127070);
+			else
+				gr_def(ctx, base + 0x304, 0x05127070);
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, base + 0x318, 1);
+			else
+				cp_ctx(ctx, base + 0x320, 1);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, base + 0x318, 0x0003ffff);
+			else if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, base + 0x318, 0x03ffffff);
+			else
+				gr_def(ctx, base + 0x320, 0x07ffffff);
+
+			if (dev_priv->chipset < 0xa0)
+				cp_ctx(ctx, base + 0x324, 5);
+			else
+				cp_ctx(ctx, base + 0x328, 4);
+
+			if (dev_priv->chipset < 0xa0) {
+				cp_ctx(ctx, base + 0x340, 9);
+				offset = base + 0x340;
+			} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
+				cp_ctx(ctx, base + 0x33c, 0xb);
+				offset = base + 0x344;
+			} else {
+				cp_ctx(ctx, base + 0x33c, 0xd);
+				offset = base + 0x344;
+			}
+			gr_def(ctx, offset + 0x0, 0x00120407);
+			gr_def(ctx, offset + 0x4, 0x05091507);
+			if (dev_priv->chipset == 0x84)
+				gr_def(ctx, offset + 0x8, 0x05100202);
+			else
+				gr_def(ctx, offset + 0x8, 0x05010202);
+			gr_def(ctx, offset + 0xc, 0x00030201);
+
+			cp_ctx(ctx, base + 0x400, 2);
+			gr_def(ctx, base + 0x404, 0x00000040);
+			cp_ctx(ctx, base + 0x40c, 2);
+			gr_def(ctx, base + 0x40c, 0x0d0c0b0a);
+			gr_def(ctx, base + 0x410, 0x00141210);
+
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0x800;
+			else
+				offset = base + 0x500;
+			cp_ctx(ctx, offset, 6);
+			gr_def(ctx, offset + 0x0, 0x000001f0);
+			gr_def(ctx, offset + 0x4, 0x00000001);
+			gr_def(ctx, offset + 0x8, 0x00000003);
+			if (dev_priv->chipset == 0x50 || dev_priv->chipset >= 0xaa)
+				gr_def(ctx, offset + 0xc, 0x00008000);
+			gr_def(ctx, offset + 0x14, 0x00039e00);
+			cp_ctx(ctx, offset + 0x1c, 2);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, offset + 0x1c, 0x00000040);
+			else
+				gr_def(ctx, offset + 0x1c, 0x00000100);
+			gr_def(ctx, offset + 0x20, 0x00003800);
+
+			if (dev_priv->chipset >= 0xa0) {
+				cp_ctx(ctx, base + 0x54c, 2);
+				if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
+					gr_def(ctx, base + 0x54c, 0x003fe006);
+				else
+					gr_def(ctx, base + 0x54c, 0x003fe007);
+				gr_def(ctx, base + 0x550, 0x003fe000);
+			}
+
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0xa00;
+			else
+				offset = base + 0x680;
+			cp_ctx(ctx, offset, 1);
+			gr_def(ctx, offset, 0x00404040);
+
+			if (dev_priv->chipset < 0xa0)
+				offset = base + 0xe00;
+			else
+				offset = base + 0x700;
+			cp_ctx(ctx, offset, 2);
+			if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, offset, 0x0077f005);
+			else if (dev_priv->chipset == 0xa5)
+				gr_def(ctx, offset, 0x6cf7f007);
+			else if (dev_priv->chipset == 0xa8)
+				gr_def(ctx, offset, 0x6cfff007);
+			else if (dev_priv->chipset == 0xac)
+				gr_def(ctx, offset, 0x0cfff007);
+			else
+				gr_def(ctx, offset, 0x0cf7f007);
+			if (dev_priv->chipset == 0x50)
+				gr_def(ctx, offset + 0x4, 0x00007fff);
+			else if (dev_priv->chipset < 0xa0)
+				gr_def(ctx, offset + 0x4, 0x003f7fff);
+			else
+				gr_def(ctx, offset + 0x4, 0x02bf7fff);
+			cp_ctx(ctx, offset + 0x2c, 1);
+			if (dev_priv->chipset == 0x50) {
+				cp_ctx(ctx, offset + 0x50, 9);
+				gr_def(ctx, offset + 0x54, 0x000003ff);
+				gr_def(ctx, offset + 0x58, 0x00000003);
+				gr_def(ctx, offset + 0x5c, 0x00000003);
+				gr_def(ctx, offset + 0x60, 0x000001ff);
+				gr_def(ctx, offset + 0x64, 0x0000001f);
+				gr_def(ctx, offset + 0x68, 0x0000000f);
+				gr_def(ctx, offset + 0x6c, 0x0000000f);
+			} else if(dev_priv->chipset < 0xa0) {
+				cp_ctx(ctx, offset + 0x50, 1);
+				cp_ctx(ctx, offset + 0x70, 1);
+			} else {
+				cp_ctx(ctx, offset + 0x50, 1);
+				cp_ctx(ctx, offset + 0x60, 5);
+			}
+		}
+	}
+}
+
+/*
+ * xfer areas. These are a pain.
+ *
+ * There are 2 xfer areas: the first one is big and contains all sorts of
+ * stuff, the second is small and contains some per-TP context.
+ *
+ * Each area is split into 8 "strands". The areas, when saved to grctx,
+ * are made of 8-word blocks. Each block contains a single word from
+ * each strand. The strands are independent of each other, their
+ * addresses are unrelated to each other, and data in them is closely
+ * packed together. The strand layout varies a bit between cards: here
+ * and there, a single word is thrown out in the middle and the whole
+ * strand is offset by a bit from corresponding one on another chipset.
+ * For this reason, addresses of stuff in strands are almost useless.
+ * Knowing sequence of stuff and size of gaps between them is much more
+ * useful, and that's how we build the strands in our generator.
+ *
+ * NVA0 takes this mess to a whole new level by cutting the old strands
+ * into a few dozen pieces [known as genes], rearranging them randomly,
+ * and putting them back together to make new strands. Hopefully these
+ * genes correspond more or less directly to the same PGRAPH subunits
+ * as in 400040 register.
+ *
+ * The most common value in default context is 0, and when the genes
+ * are separated by 0's, gene bounduaries are quite speculative...
+ * some of them can be clearly deduced, others can be guessed, and yet
+ * others won't be resolved without figuring out the real meaning of
+ * given ctxval. For the same reason, ending point of each strand
+ * is unknown. Except for strand 0, which is the longest strand and
+ * its end corresponds to end of the whole xfer.
+ *
+ * An unsolved mystery is the seek instruction: it takes an argument
+ * in bits 8-18, and that argument is clearly the place in strands to
+ * seek to... but the offsets don't seem to correspond to offsets as
+ * seen in grctx. Perhaps there's another, real, not randomly-changing
+ * addressing in strands, and the xfer insn just happens to skip over
+ * the unused bits? NV10-NV30 PIPE comes to mind...
+ *
+ * As far as I know, there's no way to access the xfer areas directly
+ * without the help of ctxprog.
+ */
+
+static inline void
+xf_emit(struct nouveau_grctx *ctx, int num, uint32_t val) {
+	int i;
+	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
+		for (i = 0; i < num; i++)
+			nv_wo32(ctx->dev, ctx->data, ctx->ctxvals_pos + (i << 3), val);
+	ctx->ctxvals_pos += num << 3;
+}
+
+/* Gene declarations... */
+
+static void nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx);
+static void nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx);
+
+static void
+nv50_graph_construct_xfer1(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i;
+	int offset;
+	int size = 0;
+	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
+
+	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
+	ctx->ctxvals_base = offset;
+
+	if (dev_priv->chipset < 0xa0) {
+		/* Strand 0 */
+		ctx->ctxvals_pos = offset;
+		switch (dev_priv->chipset) {
+		case 0x50:
+			xf_emit(ctx, 0x99, 0);
+			break;
+		case 0x84:
+		case 0x86:
+			xf_emit(ctx, 0x384, 0);
+			break;
+		case 0x92:
+		case 0x94:
+		case 0x96:
+		case 0x98:
+			xf_emit(ctx, 0x380, 0);
+			break;
+		}
+		nv50_graph_construct_gene_m2mf (ctx);
+		switch (dev_priv->chipset) {
+		case 0x50:
+		case 0x84:
+		case 0x86:
+		case 0x98:
+			xf_emit(ctx, 0x4c4, 0);
+			break;
+		case 0x92:
+		case 0x94:
+		case 0x96:
+			xf_emit(ctx, 0x984, 0);
+			break;
+		}
+		nv50_graph_construct_gene_unk5(ctx);
+		if (dev_priv->chipset == 0x50)
+			xf_emit(ctx, 0xa, 0);
+		else
+			xf_emit(ctx, 0xb, 0);
+		nv50_graph_construct_gene_unk4(ctx);
+		nv50_graph_construct_gene_unk3(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 1 */
+		ctx->ctxvals_pos = offset + 0x1;
+		nv50_graph_construct_gene_unk6(ctx);
+		nv50_graph_construct_gene_unk7(ctx);
+		nv50_graph_construct_gene_unk8(ctx);
+		switch (dev_priv->chipset) {
+		case 0x50:
+		case 0x92:
+			xf_emit(ctx, 0xfb, 0);
+			break;
+		case 0x84:
+			xf_emit(ctx, 0xd3, 0);
+			break;
+		case 0x94:
+		case 0x96:
+			xf_emit(ctx, 0xab, 0);
+			break;
+		case 0x86:
+		case 0x98:
+			xf_emit(ctx, 0x6b, 0);
+			break;
+		}
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		xf_emit(ctx, 4, 0);
+		xf_emit(ctx, 1, 0x0fac6881);
+		xf_emit(ctx, 0xb, 0);
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 2 */
+		ctx->ctxvals_pos = offset + 0x2;
+		switch (dev_priv->chipset) {
+		case 0x50:
+		case 0x92:
+			xf_emit(ctx, 0xa80, 0);
+			break;
+		case 0x84:
+			xf_emit(ctx, 0xa7e, 0);
+			break;
+		case 0x94:
+		case 0x96:
+			xf_emit(ctx, 0xa7c, 0);
+			break;
+		case 0x86:
+		case 0x98:
+			xf_emit(ctx, 0xa7a, 0);
+			break;
+		}
+		xf_emit(ctx, 1, 0x3fffff);
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x1fff);
+		xf_emit(ctx, 0xe, 0);
+		nv50_graph_construct_gene_unk9(ctx);
+		nv50_graph_construct_gene_unk2(ctx);
+		nv50_graph_construct_gene_unk1(ctx);
+		nv50_graph_construct_gene_unk10(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 3: per-ROP group state */
+		ctx->ctxvals_pos = offset + 3;
+		for (i = 0; i < 6; i++)
+			if (units & (1 << (i + 16)))
+				nv50_graph_construct_gene_ropc(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strands 4-7: per-TP state */
+		for (i = 0; i < 4; i++) {
+			ctx->ctxvals_pos = offset + 4 + i;
+			if (units & (1 << (2 * i)))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << (2 * i + 1)))
+				nv50_graph_construct_xfer_tp(ctx);
+			if ((ctx->ctxvals_pos-offset)/8 > size)
+				size = (ctx->ctxvals_pos-offset)/8;
+		}
+	} else {
+		/* Strand 0 */
+		ctx->ctxvals_pos = offset;
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 0x385, 0);
+		else
+			xf_emit(ctx, 0x384, 0);
+		nv50_graph_construct_gene_m2mf(ctx);
+		xf_emit(ctx, 0x950, 0);
+		nv50_graph_construct_gene_unk10(ctx);
+		xf_emit(ctx, 1, 0x0fac6881);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+			xf_emit(ctx, 1, 1);
+			xf_emit(ctx, 3, 0);
+		}
+		nv50_graph_construct_gene_unk8(ctx);
+		if (dev_priv->chipset == 0xa0)
+			xf_emit(ctx, 0x189, 0);
+		else if (dev_priv->chipset < 0xa8)
+			xf_emit(ctx, 0x99, 0);
+		else if (dev_priv->chipset == 0xaa)
+			xf_emit(ctx, 0x65, 0);
+		else
+			xf_emit(ctx, 0x6d, 0);
+		nv50_graph_construct_gene_unk9(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 1 */
+		ctx->ctxvals_pos = offset + 1;
+		nv50_graph_construct_gene_unk1(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 2 */
+		ctx->ctxvals_pos = offset + 2;
+		if (dev_priv->chipset == 0xa0) {
+			nv50_graph_construct_gene_unk2(ctx);
+		}
+		xf_emit(ctx, 0x36, 0);
+		nv50_graph_construct_gene_unk5(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 3 */
+		ctx->ctxvals_pos = offset + 3;
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 1);
+		nv50_graph_construct_gene_unk6(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 4 */
+		ctx->ctxvals_pos = offset + 4;
+		if (dev_priv->chipset == 0xa0)
+			xf_emit(ctx, 0xa80, 0);
+		else
+			xf_emit(ctx, 0xa7a, 0);
+		xf_emit(ctx, 1, 0x3fffff);
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x1fff);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 5 */
+		ctx->ctxvals_pos = offset + 5;
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 0x0fac6881);
+		xf_emit(ctx, 0xb, 0);
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		xf_emit(ctx, 3, 0);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 1, 0x11);
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 2, 0x4e3bfdf);
+		xf_emit(ctx, 2, 0);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 1, 0x11);
+		xf_emit(ctx, 1, 0);
+		for (i = 0; i < 8; i++)
+			if (units & (1<<(i+16)))
+				nv50_graph_construct_gene_ropc(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 6 */
+		ctx->ctxvals_pos = offset + 6;
+		nv50_graph_construct_gene_unk3(ctx);
+		xf_emit(ctx, 0xb, 0);
+		nv50_graph_construct_gene_unk4(ctx);
+		nv50_graph_construct_gene_unk7(ctx);
+		if (units & (1 << 0))
+			nv50_graph_construct_xfer_tp(ctx);
+		if (units & (1 << 1))
+			nv50_graph_construct_xfer_tp(ctx);
+		if (units & (1 << 2))
+			nv50_graph_construct_xfer_tp(ctx);
+		if (units & (1 << 3))
+			nv50_graph_construct_xfer_tp(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 7 */
+		ctx->ctxvals_pos = offset + 7;
+		if (dev_priv->chipset == 0xa0) {
+			if (units & (1 << 4))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 5))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 6))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 7))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 8))
+				nv50_graph_construct_xfer_tp(ctx);
+			if (units & (1 << 9))
+				nv50_graph_construct_xfer_tp(ctx);
+		} else {
+			nv50_graph_construct_gene_unk2(ctx);
+		}
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+	}
+
+	ctx->ctxvals_pos = offset + size * 8;
+	ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f;
+	cp_lsr (ctx, offset);
+	cp_out (ctx, CP_SET_XFER_POINTER);
+	cp_lsr (ctx, size);
+	cp_out (ctx, CP_SEEK_1);
+	cp_out (ctx, CP_XFER_1);
+	cp_wait(ctx, XFER, BUSY);
+}
+
+/*
+ * non-trivial demagiced parts of ctx init go here
+ */
+
+static void
+nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx)
+{
+	/* m2mf state */
+	xf_emit (ctx, 1, 0);		/* DMA_NOTIFY instance >> 4 */
+	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_IN instance >> 4 */
+	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_OUT instance >> 4 */
+	xf_emit (ctx, 1, 0);		/* OFFSET_IN */
+	xf_emit (ctx, 1, 0);		/* OFFSET_OUT */
+	xf_emit (ctx, 1, 0);		/* PITCH_IN */
+	xf_emit (ctx, 1, 0);		/* PITCH_OUT */
+	xf_emit (ctx, 1, 0);		/* LINE_LENGTH */
+	xf_emit (ctx, 1, 0);		/* LINE_COUNT */
+	xf_emit (ctx, 1, 0x21);		/* FORMAT: bits 0-4 INPUT_INC, bits 5-9 OUTPUT_INC */
+	xf_emit (ctx, 1, 1);		/* LINEAR_IN */
+	xf_emit (ctx, 1, 0x2);		/* TILING_MODE_IN: bits 0-2 y tiling, bits 3-5 z tiling */
+	xf_emit (ctx, 1, 0x100);	/* TILING_PITCH_IN */
+	xf_emit (ctx, 1, 0x100);	/* TILING_HEIGHT_IN */
+	xf_emit (ctx, 1, 1);		/* TILING_DEPTH_IN */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_IN_Z */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_IN */
+	xf_emit (ctx, 1, 1);		/* LINEAR_OUT */
+	xf_emit (ctx, 1, 0x2);		/* TILING_MODE_OUT: bits 0-2 y tiling, bits 3-5 z tiling */
+	xf_emit (ctx, 1, 0x100);	/* TILING_PITCH_OUT */
+	xf_emit (ctx, 1, 0x100);	/* TILING_HEIGHT_OUT */
+	xf_emit (ctx, 1, 1);		/* TILING_DEPTH_OUT */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT_Z */
+	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT */
+	xf_emit (ctx, 1, 0);		/* OFFSET_IN_HIGH */
+	xf_emit (ctx, 1, 0);		/* OFFSET_OUT_HIGH */
+}
+
+static void
+nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	switch (dev_priv->chipset) {
+	case 0x50:
+	case 0x86:
+	case 0x98:
+	case 0xaa:
+	case 0xac:
+		xf_emit(ctx, 0x542, 0);
+		break;
+	case 0x84:
+	case 0x92:
+	case 0x94:
+	case 0x96:
+		xf_emit(ctx, 0x942, 0);
+		break;
+	case 0xa0:
+		xf_emit(ctx, 0x2042, 0);
+		break;
+	case 0xa5:
+	case 0xa8:
+		xf_emit(ctx, 0x842, 0);
+		break;
+	}
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x27);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x26);
+	xf_emit(ctx, 3, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx)
+{
+	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+	xf_emit(ctx, 0x10, 0x04000000);
+	xf_emit(ctx, 0x24, 0);
+	xf_emit(ctx, 2, 0x04e3bfdf);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x1fe21);
+}
+
+static void
+nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 2 on pre-NVA0, beginning of area 2 on NVA0, area 7 on >NVA0 */
+	if (dev_priv->chipset != 0x50) {
+		xf_emit(ctx, 5, 0);
+		xf_emit(ctx, 1, 0x80c14);
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x804);
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 2, 4);
+		xf_emit(ctx, 1, 0x8100c12);
+	}
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x10);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 3, 0);
+	else
+		xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x804);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x1a);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 0x7f);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 6, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 0x38, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 0x38, 0);
+	xf_emit(ctx, 2, 0x88);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 0x16, 0);
+	xf_emit(ctx, 1, 0x26);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x3f800000);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 1, 0x10);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 0x28, 0);
+	else
+		xf_emit(ctx, 0x25, 0);
+	xf_emit(ctx, 1, 0x52);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x26);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* end of area 0 on pre-NVA0, beginning of area 6 on NVAx */
+	xf_emit(ctx, 1, 0x3f);
+	xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 0x04000000);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 0x10, 0);
+	else
+		xf_emit(ctx, 0x11, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x1001);
+	xf_emit(ctx, 4, 0xffff);
+	xf_emit(ctx, 0x20, 0);
+	xf_emit(ctx, 0x10, 0x3f800000);
+	xf_emit(ctx, 1, 0x10);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0);
+	else
+		xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 2, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx)
+{
+	/* middle of area 0 on pre-NVA0, middle of area 6 on NVAx */
+	xf_emit(ctx, 2, 0x04000000);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 0 on pre-NVA0 [after m2mf], end of area 2 on NVAx */
+	xf_emit(ctx, 2, 4);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x1c4d, 0);
+	else
+		xf_emit(ctx, 0x1c4b, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0x8100c12);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0x80c14);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 0x27);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x3c1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x16, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* beginning of area 1 on pre-NVA0 [after m2mf], area 3 on NVAx */
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 8, 0);
+	else
+		xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x20);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x11, 0);
+	else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 0xf, 0);
+	else
+		xf_emit(ctx, 0xe, 0);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 0xd, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 8);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	if (dev_priv->chipset == 0xa8)
+		xf_emit(ctx, 1, 0x1e00);
+	xf_emit(ctx, 0xc, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 0x125, 0);
+	else if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x126, 0);
+	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+		xf_emit(ctx, 0x124, 0);
+	else
+		xf_emit(ctx, 0x1f7, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 3, 0);
+	else
+		xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0xa1, 0);
+	else
+		xf_emit(ctx, 0x5a, 0);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x834, 0);
+	else if (dev_priv->chipset == 0xa0)
+		xf_emit(ctx, 0x1873, 0);
+	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x8ba, 0);
+	else
+		xf_emit(ctx, 0x833, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 0xf, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 6 on NVAx */
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0x100);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 8);
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 1, 0xcf);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x15);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x4444480);
+	xf_emit(ctx, 0x37, 0);
+}
+
+static void
+nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx)
+{
+	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 0 on NVAx */
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x10001);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x10001);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x10001);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 2);
+}
+
+static void
+nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	/* middle of area 2 on pre-NVA0 [after m2mf], end of area 0 on NVAx */
+	xf_emit(ctx, 1, 0x3f800000);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x1a);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x12, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 0xf, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 3);
+	else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 0x04000000);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 5);
+	xf_emit(ctx, 1, 0x52);
+	if (dev_priv->chipset == 0x50) {
+		xf_emit(ctx, 0x13, 0);
+	} else {
+		xf_emit(ctx, 4, 0);
+		xf_emit(ctx, 1, 1);
+		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 0x11, 0);
+		else
+			xf_emit(ctx, 0x10, 0);
+	}
+	xf_emit(ctx, 0x10, 0x3f800000);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 0x26, 0);
+	xf_emit(ctx, 1, 0x8100c12);
+	xf_emit(ctx, 1, 5);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 4, 0xffff);
+	if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 3);
+	if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x1f, 0);
+	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0xc, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 1, 0x1a);
+	if (dev_priv->chipset != 0x50) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 3);
+	}
+	if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0x26, 0);
+	else
+		xf_emit(ctx, 0x3c, 0);
+	xf_emit(ctx, 1, 0x102);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 4, 4);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0x3ff);
+	else
+		xf_emit(ctx, 1, 0x7ff);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x102);
+	xf_emit(ctx, 9, 0);
+	xf_emit(ctx, 4, 4);
+	xf_emit(ctx, 0x2c, 0);
+}
+
+static void
+nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int magic2;
+	if (dev_priv->chipset == 0x50) {
+		magic2 = 0x00003e60;
+	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
+		magic2 = 0x001ffe67;
+	} else {
+		magic2 = 0x00087e67;
+	}
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 4, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 7, 0);
+	if (dev_priv->chipset >= 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 0x15);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x92 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa0) {
+		xf_emit(ctx, 1, 4);
+		xf_emit(ctx, 1, 0x400);
+		xf_emit(ctx, 1, 0x300);
+		xf_emit(ctx, 1, 0x1001);
+		if (dev_priv->chipset != 0xa0) {
+			if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+				xf_emit(ctx, 1, 0);
+			else
+				xf_emit(ctx, 1, 0x15);
+		}
+		xf_emit(ctx, 3, 0);
+	}
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x13, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 0x10, 0);
+	xf_emit(ctx, 0x10, 0x3f800000);
+	xf_emit(ctx, 0x19, 0);
+	xf_emit(ctx, 1, 0x10);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x3f);
+	xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset >= 0xa0) {
+		xf_emit(ctx, 2, 0);
+		xf_emit(ctx, 1, 0x1001);
+		xf_emit(ctx, 0xb, 0);
+	} else {
+		xf_emit(ctx, 0xc, 0);
+	}
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x11);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 6, 0);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 0x18, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 3, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 5, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 0x16, 0);
+	} else {
+		if (dev_priv->chipset >= 0xa0)
+			xf_emit(ctx, 0x1b, 0);
+		else
+			xf_emit(ctx, 0x15, 0);
+	}
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 0x10, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 0x10, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 3, 0);
+	}
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x5b, 0);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x1(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int magic3;
+	if (dev_priv->chipset == 0x50)
+		magic3 = 0x1000;
+	else if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8)
+		magic3 = 0x1e00;
+	else
+		magic3 = 0;
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0x24, 0);
+	else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 0x14, 0);
+	else
+		xf_emit(ctx, 0x15, 0);
+	xf_emit(ctx, 2, 4);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 0x03020100);
+	else
+		xf_emit(ctx, 1, 0x00608080);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 2, 4);
+	xf_emit(ctx, 1, 0x80);
+	if (magic3)
+		xf_emit(ctx, 1, magic3);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 0x24, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x80);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0x03020100);
+	xf_emit(ctx, 1, 3);
+	if (magic3)
+		xf_emit(ctx, 1, magic3);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset == 0x94 || dev_priv->chipset == 0x96)
+		xf_emit(ctx, 0x1024, 0);
+	else if (dev_priv->chipset < 0xa0)
+		xf_emit(ctx, 0xa24, 0);
+	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+		xf_emit(ctx, 0x214, 0);
+	else
+		xf_emit(ctx, 0x414, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 2, 0);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int magic1, magic2;
+	if (dev_priv->chipset == 0x50) {
+		magic1 = 0x3ff;
+		magic2 = 0x00003e60;
+	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
+		magic1 = 0x7ff;
+		magic2 = 0x001ffe67;
+	} else {
+		magic1 = 0x7ff;
+		magic2 = 0x00087e67;
+	}
+	xf_emit(ctx, 3, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0xc, 0);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 0xb, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 4, 0xffff);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 1, 3);
+		xf_emit(ctx, 1, 0);
+	} else if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 0x18, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 8, 1);
+		xf_emit(ctx, 1, 0);
+	}
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 3, 0xcf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 8, 1);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x11);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 1, 1);
+	if(dev_priv->chipset == 0x50)
+		xf_emit(ctx, 1, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, magic1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x28, 0);
+	xf_emit(ctx, 8, 8);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 8, 0x400);
+	xf_emit(ctx, 8, 0x300);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x20);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x40);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 4, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 9, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x400);
+	xf_emit(ctx, 1, 0x300);
+	xf_emit(ctx, 1, 0x1001);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 4, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, 0xf);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 0x15, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 3, 0);
+	} else
+		xf_emit(ctx, 0x17, 0);
+	if (dev_priv->chipset >= 0xa0)
+		xf_emit(ctx, 1, 0x0fac6881);
+	xf_emit(ctx, 1, magic2);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 3, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 0);
+	else if (dev_priv->chipset != 0x50)
+		xf_emit(ctx, 1, 0);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x3(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 2, 0);
+	else
+		xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0x2a712488);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x4085c000);
+	xf_emit(ctx, 1, 0x40);
+	xf_emit(ctx, 1, 0x100);
+	xf_emit(ctx, 1, 0x10100);
+	xf_emit(ctx, 1, 0x02800000);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x4(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	xf_emit(ctx, 2, 0x04e3bfdf);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 2, 1);
+	else
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 0x00ffff00);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0x30201000);
+	xf_emit(ctx, 1, 0x70605040);
+	xf_emit(ctx, 1, 0xb8a89888);
+	xf_emit(ctx, 1, 0xf8e8d8c8);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x1a);
+}
+
+static void
+nv50_graph_construct_xfer_tp_x5(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 0xfac6881);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 2, 0);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 0xb, 0);
+	else
+		xf_emit(ctx, 0xa, 0);
+	xf_emit(ctx, 8, 1);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0xfac6881);
+	xf_emit(ctx, 1, 0xf);
+	xf_emit(ctx, 7, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 1);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 6, 0);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 6, 0);
+	} else {
+		xf_emit(ctx, 0xb, 0);
+	}
+}
+
+static void
+nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	if (dev_priv->chipset < 0xa0) {
+		nv50_graph_construct_xfer_tp_x1(ctx);
+		nv50_graph_construct_xfer_tp_x2(ctx);
+		nv50_graph_construct_xfer_tp_x3(ctx);
+		if (dev_priv->chipset == 0x50)
+			xf_emit(ctx, 0xf, 0);
+		else
+			xf_emit(ctx, 0x12, 0);
+		nv50_graph_construct_xfer_tp_x4(ctx);
+	} else {
+		nv50_graph_construct_xfer_tp_x3(ctx);
+		if (dev_priv->chipset < 0xaa)
+			xf_emit(ctx, 0xc, 0);
+		else
+			xf_emit(ctx, 0xa, 0);
+		nv50_graph_construct_xfer_tp_x2(ctx);
+		nv50_graph_construct_xfer_tp_x5(ctx);
+		nv50_graph_construct_xfer_tp_x4(ctx);
+		nv50_graph_construct_xfer_tp_x1(ctx);
+	}
+}
+
+static void
+nv50_graph_construct_xfer_tp2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i, mpcnt;
+	if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
+		mpcnt = 1;
+	else if (dev_priv->chipset < 0xa0 || dev_priv->chipset >= 0xa8)
+		mpcnt = 2;
+	else
+		mpcnt = 3;
+	for (i = 0; i < mpcnt; i++) {
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 0x80);
+		xf_emit(ctx, 1, 0x80007004);
+		xf_emit(ctx, 1, 0x04000400);
+		if (dev_priv->chipset >= 0xa0)
+			xf_emit(ctx, 1, 0xc0);
+		xf_emit(ctx, 1, 0x1000);
+		xf_emit(ctx, 2, 0);
+		if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8) {
+			xf_emit(ctx, 1, 0xe00);
+			xf_emit(ctx, 1, 0x1e00);
+		}
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 2, 0);
+		if (dev_priv->chipset == 0x50)
+			xf_emit(ctx, 2, 0x1000);
+		xf_emit(ctx, 1, 1);
+		xf_emit(ctx, 1, 0);
+		xf_emit(ctx, 1, 4);
+		xf_emit(ctx, 1, 2);
+		if (dev_priv->chipset >= 0xaa)
+			xf_emit(ctx, 0xb, 0);
+		else if (dev_priv->chipset >= 0xa0)
+			xf_emit(ctx, 0xc, 0);
+		else
+			xf_emit(ctx, 0xa, 0);
+	}
+	xf_emit(ctx, 1, 0x08100c12);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset >= 0xa0) {
+		xf_emit(ctx, 1, 0x1fe21);
+	}
+	xf_emit(ctx, 5, 0);
+	xf_emit(ctx, 4, 0xffff);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 2, 0x10001);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 0x1fe21);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 4, 0);
+	xf_emit(ctx, 1, 0x08100c12);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 8, 0);
+	xf_emit(ctx, 1, 0xfac6881);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+		xf_emit(ctx, 1, 3);
+	xf_emit(ctx, 3, 0);
+	xf_emit(ctx, 1, 4);
+	xf_emit(ctx, 9, 0);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 2, 1);
+	xf_emit(ctx, 1, 2);
+	xf_emit(ctx, 3, 1);
+	xf_emit(ctx, 1, 0);
+	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 0x10, 1);
+		xf_emit(ctx, 8, 2);
+		xf_emit(ctx, 0x18, 1);
+		xf_emit(ctx, 3, 0);
+	}
+	xf_emit(ctx, 1, 4);
+	if (dev_priv->chipset == 0x50)
+		xf_emit(ctx, 0x3a0, 0);
+	else if (dev_priv->chipset < 0x94)
+		xf_emit(ctx, 0x3a2, 0);
+	else if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
+		xf_emit(ctx, 0x39f, 0);
+	else
+		xf_emit(ctx, 0x3a3, 0);
+	xf_emit(ctx, 1, 0x11);
+	xf_emit(ctx, 1, 0);
+	xf_emit(ctx, 1, 1);
+	xf_emit(ctx, 0x2d, 0);
+}
+
+static void
+nv50_graph_construct_xfer2(struct nouveau_grctx *ctx)
+{
+	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+	int i;
+	uint32_t offset;
+	uint32_t units = nv_rd32 (ctx->dev, 0x1540);
+	int size = 0;
+
+	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
+
+	if (dev_priv->chipset < 0xa0) {
+		for (i = 0; i < 8; i++) {
+			ctx->ctxvals_pos = offset + i;
+			if (i == 0)
+				xf_emit(ctx, 1, 0x08100c12);
+			if (units & (1 << i))
+				nv50_graph_construct_xfer_tp2(ctx);
+			if ((ctx->ctxvals_pos-offset)/8 > size)
+				size = (ctx->ctxvals_pos-offset)/8;
+		}
+	} else {
+		/* Strand 0: TPs 0, 1 */
+		ctx->ctxvals_pos = offset;
+		xf_emit(ctx, 1, 0x08100c12);
+		if (units & (1 << 0))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 1))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 0: TPs 2, 3 */
+		ctx->ctxvals_pos = offset + 1;
+		if (units & (1 << 2))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 3))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 0: TPs 4, 5, 6 */
+		ctx->ctxvals_pos = offset + 2;
+		if (units & (1 << 4))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 5))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 6))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+
+		/* Strand 0: TPs 7, 8, 9 */
+		ctx->ctxvals_pos = offset + 3;
+		if (units & (1 << 7))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 8))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if (units & (1 << 9))
+			nv50_graph_construct_xfer_tp2(ctx);
+		if ((ctx->ctxvals_pos-offset)/8 > size)
+			size = (ctx->ctxvals_pos-offset)/8;
+	}
+	ctx->ctxvals_pos = offset + size * 8;
+	ctx->ctxvals_pos = (ctx->ctxvals_pos+0x3f)&~0x3f;
+	cp_lsr (ctx, offset);
+	cp_out (ctx, CP_SET_XFER_POINTER);
+	cp_lsr (ctx, size);
+	cp_out (ctx, CP_SEEK_2);
+	cp_out (ctx, CP_XFER_2);
+	cp_wait(ctx, XFER, BUSY);
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index f0dc4e3..de1f5b0 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -390,7 +390,7 @@
 	if (gpuobj->im_backing)
 		return -EINVAL;
 
-	*sz = (*sz + (NV50_INSTMEM_PAGE_SIZE-1)) & ~(NV50_INSTMEM_PAGE_SIZE-1);
+	*sz = ALIGN(*sz, NV50_INSTMEM_PAGE_SIZE);
 	if (*sz == 0)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 1cc7b93..ed38262 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -30,6 +30,9 @@
 $(obj)/rs600_reg_safe.h: $(src)/reg_srcs/rs600 $(obj)/mkregtable
 	$(call if_changed,mkregtable)
 
+$(obj)/r600_reg_safe.h: $(src)/reg_srcs/r600 $(obj)/mkregtable
+	$(call if_changed,mkregtable)
+
 $(obj)/r100.o: $(obj)/r100_reg_safe.h $(obj)/rn50_reg_safe.h
 
 $(obj)/r200.o: $(obj)/r200_reg_safe.h
@@ -42,6 +45,8 @@
 
 $(obj)/rs600.o: $(obj)/rs600_reg_safe.h
 
+$(obj)/r600_cs.o: $(obj)/r600_reg_safe.h
+
 radeon-y := radeon_drv.o radeon_cp.o radeon_state.o radeon_mem.o \
 	radeon_irq.o r300_cmdbuf.o r600_cp.o
 # add KMS driver
@@ -54,8 +59,10 @@
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
-	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o
+	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
+	evergreen.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
+radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
 
 obj-$(CONFIG_DRM_RADEON)+= radeon.o
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index 7f152f6..d75788f 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -881,8 +881,6 @@
 	uint8_t attr = U8((*ptr)++), shift;
 	uint32_t saved, dst;
 	int dptr = *ptr;
-	attr &= 0x38;
-	attr |= atom_def_dst[attr >> 3] << 6;
 	SDEBUG("   dst: ");
 	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
 	shift = atom_get_src(ctx, attr, ptr);
@@ -897,8 +895,6 @@
 	uint8_t attr = U8((*ptr)++), shift;
 	uint32_t saved, dst;
 	int dptr = *ptr;
-	attr &= 0x38;
-	attr |= atom_def_dst[attr >> 3] << 6;
 	SDEBUG("   dst: ");
 	dst = atom_get_dst(ctx, arg, attr, ptr, &saved, 1);
 	shift = atom_get_src(ctx, attr, ptr);
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index 91ad0d1..6732b5d 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2006-2007 Advanced Micro Devices, Inc.
+ * Copyright 2006-2007 Advanced Micro Devices, Inc.  
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,10 +20,12 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-/****************************************************************************/
+
+/****************************************************************************/	
 /*Portion I: Definitions  shared between VBIOS and Driver                   */
 /****************************************************************************/
 
+
 #ifndef _ATOMBIOS_H
 #define _ATOMBIOS_H
 
@@ -40,39 +42,46 @@
 #endif
 
 #ifdef _H2INC
-#ifndef ULONG
-typedef unsigned long ULONG;
-#endif
+  #ifndef ULONG 
+    typedef unsigned long ULONG;
+  #endif
 
-#ifndef UCHAR
-typedef unsigned char UCHAR;
-#endif
+  #ifndef UCHAR
+    typedef unsigned char UCHAR;
+  #endif
 
-#ifndef USHORT
-typedef unsigned short USHORT;
+  #ifndef USHORT 
+    typedef unsigned short USHORT;
+  #endif
 #endif
-#endif
-
-#define ATOM_DAC_A            0
+      
+#define ATOM_DAC_A            0 
 #define ATOM_DAC_B            1
 #define ATOM_EXT_DAC          2
 
 #define ATOM_CRTC1            0
 #define ATOM_CRTC2            1
+#define ATOM_CRTC3            2
+#define ATOM_CRTC4            3
+#define ATOM_CRTC5            4
+#define ATOM_CRTC6            5
+#define ATOM_CRTC_INVALID     0xFF
 
 #define ATOM_DIGA             0
 #define ATOM_DIGB             1
 
 #define ATOM_PPLL1            0
 #define ATOM_PPLL2            1
+#define ATOM_DCPLL            2
+#define ATOM_PPLL_INVALID     0xFF
 
 #define ATOM_SCALER1          0
 #define ATOM_SCALER2          1
 
-#define ATOM_SCALER_DISABLE   0
-#define ATOM_SCALER_CENTER    1
-#define ATOM_SCALER_EXPANSION 2
-#define ATOM_SCALER_MULTI_EX  3
+#define ATOM_SCALER_DISABLE   0   
+#define ATOM_SCALER_CENTER    1   
+#define ATOM_SCALER_EXPANSION 2   
+#define ATOM_SCALER_MULTI_EX  3   
 
 #define ATOM_DISABLE          0
 #define ATOM_ENABLE           1
@@ -82,6 +91,7 @@
 #define ATOM_LCD_SELFTEST_START									(ATOM_DISABLE+5)
 #define ATOM_LCD_SELFTEST_STOP									(ATOM_ENABLE+5)
 #define ATOM_ENCODER_INIT			                  (ATOM_DISABLE+7)
+#define ATOM_GET_STATUS                         (ATOM_DISABLE+8)
 
 #define ATOM_BLANKING         1
 #define ATOM_BLANKING_OFF     0
@@ -114,7 +124,7 @@
 #define ATOM_DAC2_CV          ATOM_DAC1_CV
 #define ATOM_DAC2_NTSC        ATOM_DAC1_NTSC
 #define ATOM_DAC2_PAL         ATOM_DAC1_PAL
-
+ 
 #define ATOM_PM_ON            0
 #define ATOM_PM_STANDBY       1
 #define ATOM_PM_SUSPEND       2
@@ -134,6 +144,7 @@
 #define ATOM_PANEL_MISC_TEMPORAL           0x00000040
 #define ATOM_PANEL_MISC_API_ENABLED        0x00000080
 
+
 #define MEMTYPE_DDR1              "DDR1"
 #define MEMTYPE_DDR2              "DDR2"
 #define MEMTYPE_DDR3              "DDR3"
@@ -145,19 +156,19 @@
 
 /* Maximum size of that FireGL flag string */
 
-#define ATOM_FIREGL_FLAG_STRING     "FGL"	/* Flag used to enable FireGL Support */
-#define ATOM_MAX_SIZE_OF_FIREGL_FLAG_STRING  3	/* sizeof( ATOM_FIREGL_FLAG_STRING ) */
+#define ATOM_FIREGL_FLAG_STRING     "FGL"             //Flag used to enable FireGL Support
+#define ATOM_MAX_SIZE_OF_FIREGL_FLAG_STRING  3        //sizeof( ATOM_FIREGL_FLAG_STRING )
 
-#define ATOM_FAKE_DESKTOP_STRING    "DSK"	/* Flag used to enable mobile ASIC on Desktop */
-#define ATOM_MAX_SIZE_OF_FAKE_DESKTOP_STRING  ATOM_MAX_SIZE_OF_FIREGL_FLAG_STRING
+#define ATOM_FAKE_DESKTOP_STRING    "DSK"             //Flag used to enable mobile ASIC on Desktop
+#define ATOM_MAX_SIZE_OF_FAKE_DESKTOP_STRING  ATOM_MAX_SIZE_OF_FIREGL_FLAG_STRING 
 
-#define ATOM_M54T_FLAG_STRING       "M54T"	/* Flag used to enable M54T Support */
-#define ATOM_MAX_SIZE_OF_M54T_FLAG_STRING    4	/* sizeof( ATOM_M54T_FLAG_STRING ) */
+#define ATOM_M54T_FLAG_STRING       "M54T"            //Flag used to enable M54T Support
+#define ATOM_MAX_SIZE_OF_M54T_FLAG_STRING    4        //sizeof( ATOM_M54T_FLAG_STRING )
 
 #define HW_ASSISTED_I2C_STATUS_FAILURE          2
 #define HW_ASSISTED_I2C_STATUS_SUCCESS          1
 
-#pragma pack(1)			/* BIOS data must use byte aligment */
+#pragma pack(1)                                       /* BIOS data must use byte aligment */
 
 /*  Define offset to location of ROM header. */
 
@@ -165,367 +176,410 @@
 #define OFFSET_TO_ATOM_ROM_IMAGE_SIZE				    0x00000002L
 
 #define OFFSET_TO_ATOMBIOS_ASIC_BUS_MEM_TYPE    0x94
-#define MAXSIZE_OF_ATOMBIOS_ASIC_BUS_MEM_TYPE   20	/* including the terminator 0x0! */
+#define MAXSIZE_OF_ATOMBIOS_ASIC_BUS_MEM_TYPE   20    /* including the terminator 0x0! */
 #define	OFFSET_TO_GET_ATOMBIOS_STRINGS_NUMBER		0x002f
 #define	OFFSET_TO_GET_ATOMBIOS_STRINGS_START		0x006e
 
 /* Common header for all ROM Data tables.
-  Every table pointed  _ATOM_MASTER_DATA_TABLE has this common header.
+  Every table pointed  _ATOM_MASTER_DATA_TABLE has this common header. 
   And the pointer actually points to this header. */
 
-typedef struct _ATOM_COMMON_TABLE_HEADER {
-	USHORT usStructureSize;
-	UCHAR ucTableFormatRevision;	/*Change it when the Parser is not backward compatible */
-	UCHAR ucTableContentRevision;	/*Change it only when the table needs to change but the firmware */
-	/*Image can't be updated, while Driver needs to carry the new table! */
-} ATOM_COMMON_TABLE_HEADER;
+typedef struct _ATOM_COMMON_TABLE_HEADER
+{
+  USHORT usStructureSize;
+  UCHAR  ucTableFormatRevision;   /*Change it when the Parser is not backward compatible */
+  UCHAR  ucTableContentRevision;  /*Change it only when the table needs to change but the firmware */
+                                  /*Image can't be updated, while Driver needs to carry the new table! */
+}ATOM_COMMON_TABLE_HEADER;
 
-typedef struct _ATOM_ROM_HEADER {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR uaFirmWareSignature[4];	/*Signature to distinguish between Atombios and non-atombios,
-					   atombios should init it as "ATOM", don't change the position */
-	USHORT usBiosRuntimeSegmentAddress;
-	USHORT usProtectedModeInfoOffset;
-	USHORT usConfigFilenameOffset;
-	USHORT usCRC_BlockOffset;
-	USHORT usBIOS_BootupMessageOffset;
-	USHORT usInt10Offset;
-	USHORT usPciBusDevInitCode;
-	USHORT usIoBaseAddress;
-	USHORT usSubsystemVendorID;
-	USHORT usSubsystemID;
-	USHORT usPCI_InfoOffset;
-	USHORT usMasterCommandTableOffset;	/*Offset for SW to get all command table offsets, Don't change the position */
-	USHORT usMasterDataTableOffset;	/*Offset for SW to get all data table offsets, Don't change the position */
-	UCHAR ucExtendedFunctionCode;
-	UCHAR ucReserved;
-} ATOM_ROM_HEADER;
+typedef struct _ATOM_ROM_HEADER
+{
+  ATOM_COMMON_TABLE_HEADER		sHeader;
+  UCHAR	 uaFirmWareSignature[4];    /*Signature to distinguish between Atombios and non-atombios, 
+                                      atombios should init it as "ATOM", don't change the position */
+  USHORT usBiosRuntimeSegmentAddress;
+  USHORT usProtectedModeInfoOffset;
+  USHORT usConfigFilenameOffset;
+  USHORT usCRC_BlockOffset;
+  USHORT usBIOS_BootupMessageOffset;
+  USHORT usInt10Offset;
+  USHORT usPciBusDevInitCode;
+  USHORT usIoBaseAddress;
+  USHORT usSubsystemVendorID;
+  USHORT usSubsystemID;
+  USHORT usPCI_InfoOffset; 
+  USHORT usMasterCommandTableOffset; /*Offset for SW to get all command table offsets, Don't change the position */
+  USHORT usMasterDataTableOffset;   /*Offset for SW to get all data table offsets, Don't change the position */
+  UCHAR  ucExtendedFunctionCode;
+  UCHAR  ucReserved;
+}ATOM_ROM_HEADER;
 
 /*==============================Command Table Portion==================================== */
 
 #ifdef	UEFI_BUILD
-#define	UTEMP	USHORT
-#define	USHORT	void*
+	#define	UTEMP	USHORT
+	#define	USHORT	void*
 #endif
 
-typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES {
-	USHORT ASIC_Init;	/* Function Table, used by various SW components,latest version 1.1 */
-	USHORT GetDisplaySurfaceSize;	/* Atomic Table,  Used by Bios when enabling HW ICON */
-	USHORT ASIC_RegistersInit;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
-	USHORT VRAM_BlockVenderDetection;	/* Atomic Table,  used only by Bios */
-	USHORT DIGxEncoderControl;	/* Only used by Bios */
-	USHORT MemoryControllerInit;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
-	USHORT EnableCRTCMemReq;	/* Function Table,directly used by various SW components,latest version 2.1 */
-	USHORT MemoryParamAdjust;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock if needed */
-	USHORT DVOEncoderControl;	/* Function Table,directly used by various SW components,latest version 1.2 */
-	USHORT GPIOPinControl;	/* Atomic Table,  only used by Bios */
-	USHORT SetEngineClock;	/*Function Table,directly used by various SW components,latest version 1.1 */
-	USHORT SetMemoryClock;	/* Function Table,directly used by various SW components,latest version 1.1 */
-	USHORT SetPixelClock;	/*Function Table,directly used by various SW components,latest version 1.2 */
-	USHORT DynamicClockGating;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
-	USHORT ResetMemoryDLL;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
-	USHORT ResetMemoryDevice;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
-	USHORT MemoryPLLInit;
-	USHORT AdjustDisplayPll;	/* only used by Bios */
-	USHORT AdjustMemoryController;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
-	USHORT EnableASIC_StaticPwrMgt;	/* Atomic Table,  only used by Bios */
-	USHORT ASIC_StaticPwrMgtStatusChange;	/* Obsolete, only used by Bios */
-	USHORT DAC_LoadDetection;	/* Atomic Table,  directly used by various SW components,latest version 1.2 */
-	USHORT LVTMAEncoderControl;	/* Atomic Table,directly used by various SW components,latest version 1.3 */
-	USHORT LCD1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT DAC1EncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT DAC2EncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT DVOOutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT CV1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT GetConditionalGoldenSetting;	/* only used by Bios */
-	USHORT TVEncoderControl;	/* Function Table,directly used by various SW components,latest version 1.1 */
-	USHORT TMDSAEncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.3 */
-	USHORT LVDSEncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 1.3 */
-	USHORT TV1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT EnableScaler;	/* Atomic Table,  used only by Bios */
-	USHORT BlankCRTC;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT EnableCRTC;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT GetPixelClock;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT EnableVGA_Render;	/* Function Table,directly used by various SW components,latest version 1.1 */
-	USHORT EnableVGA_Access;	/* Obsolete ,     only used by Bios */
-	USHORT SetCRTC_Timing;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT SetCRTC_OverScan;	/* Atomic Table,  used by various SW components,latest version 1.1 */
-	USHORT SetCRTC_Replication;	/* Atomic Table,  used only by Bios */
-	USHORT SelectCRTC_Source;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT EnableGraphSurfaces;	/* Atomic Table,  used only by Bios */
-	USHORT UpdateCRTC_DoubleBufferRegisters;
-	USHORT LUT_AutoFill;	/* Atomic Table,  only used by Bios */
-	USHORT EnableHW_IconCursor;	/* Atomic Table,  only used by Bios */
-	USHORT GetMemoryClock;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT GetEngineClock;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT SetCRTC_UsingDTDTiming;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT ExternalEncoderControl;	/* Atomic Table,  directly used by various SW components,latest version 2.1 */
-	USHORT LVTMAOutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT VRAM_BlockDetectionByStrap;	/* Atomic Table,  used only by Bios */
-	USHORT MemoryCleanUp;	/* Atomic Table,  only used by Bios */
-	USHORT ProcessI2cChannelTransaction;	/* Function Table,only used by Bios */
-	USHORT WriteOneByteToHWAssistedI2C;	/* Function Table,indirectly used by various SW components */
-	USHORT ReadHWAssistedI2CStatus;	/* Atomic Table,  indirectly used by various SW components */
-	USHORT SpeedFanControl;	/* Function Table,indirectly used by various SW components,called from ASIC_Init */
-	USHORT PowerConnectorDetection;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT MC_Synchronization;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
-	USHORT ComputeMemoryEnginePLL;	/* Atomic Table,  indirectly used by various SW components,called from SetMemory/EngineClock */
-	USHORT MemoryRefreshConversion;	/* Atomic Table,  indirectly used by various SW components,called from SetMemory or SetEngineClock */
-	USHORT VRAM_GetCurrentInfoBlock;	/* Atomic Table,  used only by Bios */
-	USHORT DynamicMemorySettings;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
-	USHORT MemoryTraining;	/* Atomic Table,  used only by Bios */
-	USHORT EnableSpreadSpectrumOnPPLL;	/* Atomic Table,  directly used by various SW components,latest version 1.2 */
-	USHORT TMDSAOutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT SetVoltage;	/* Function Table,directly and/or indirectly used by various SW components,latest version 1.1 */
-	USHORT DAC1OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT DAC2OutputControl;	/* Atomic Table,  directly used by various SW components,latest version 1.1 */
-	USHORT SetupHWAssistedI2CStatus;	/* Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C" */
-	USHORT ClockSource;	/* Atomic Table,  indirectly used by various SW components,called from ASIC_Init */
-	USHORT MemoryDeviceInit;	/* Atomic Table,  indirectly used by various SW components,called from SetMemoryClock */
-	USHORT EnableYUV;	/* Atomic Table,  indirectly used by various SW components,called from EnableVGARender */
-	USHORT DIG1EncoderControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
-	USHORT DIG2EncoderControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
-	USHORT DIG1TransmitterControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
-	USHORT DIG2TransmitterControl;	/* Atomic Table,directly used by various SW components,latest version 1.1 */
-	USHORT ProcessAuxChannelTransaction;	/* Function Table,only used by Bios */
-	USHORT DPEncoderService;	/* Function Table,only used by Bios */
-} ATOM_MASTER_LIST_OF_COMMAND_TABLES;
+typedef struct _ATOM_MASTER_LIST_OF_COMMAND_TABLES{
+  USHORT ASIC_Init;                              //Function Table, used by various SW components,latest version 1.1
+  USHORT GetDisplaySurfaceSize;                  //Atomic Table,  Used by Bios when enabling HW ICON
+  USHORT ASIC_RegistersInit;                     //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
+  USHORT VRAM_BlockVenderDetection;              //Atomic Table,  used only by Bios
+  USHORT DIGxEncoderControl;										 //Only used by Bios
+  USHORT MemoryControllerInit;                   //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
+  USHORT EnableCRTCMemReq;                       //Function Table,directly used by various SW components,latest version 2.1
+  USHORT MemoryParamAdjust; 										 //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock if needed
+  USHORT DVOEncoderControl;                      //Function Table,directly used by various SW components,latest version 1.2
+  USHORT GPIOPinControl;												 //Atomic Table,  only used by Bios
+  USHORT SetEngineClock;                         //Function Table,directly used by various SW components,latest version 1.1
+  USHORT SetMemoryClock;                         //Function Table,directly used by various SW components,latest version 1.1
+  USHORT SetPixelClock;                          //Function Table,directly used by various SW components,latest version 1.2  
+  USHORT DynamicClockGating;                     //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
+  USHORT ResetMemoryDLL;                         //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+  USHORT ResetMemoryDevice;                      //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+  USHORT MemoryPLLInit;
+  USHORT AdjustDisplayPll;												//only used by Bios
+  USHORT AdjustMemoryController;                 //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock                
+  USHORT EnableASIC_StaticPwrMgt;                //Atomic Table,  only used by Bios
+  USHORT ASIC_StaticPwrMgtStatusChange;          //Obsolete ,     only used by Bios   
+  USHORT DAC_LoadDetection;                      //Atomic Table,  directly used by various SW components,latest version 1.2  
+  USHORT LVTMAEncoderControl;                    //Atomic Table,directly used by various SW components,latest version 1.3
+  USHORT LCD1OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT DAC1EncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1  
+  USHORT DAC2EncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT DVOOutputControl;                       //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT CV1OutputControl;                       //Atomic Table,  Atomic Table,  Obsolete from Ry6xx, use DAC2 Output instead 
+  USHORT GetConditionalGoldenSetting;            //only used by Bios
+  USHORT TVEncoderControl;                       //Function Table,directly used by various SW components,latest version 1.1
+  USHORT TMDSAEncoderControl;                    //Atomic Table,  directly used by various SW components,latest version 1.3
+  USHORT LVDSEncoderControl;                     //Atomic Table,  directly used by various SW components,latest version 1.3
+  USHORT TV1OutputControl;                       //Atomic Table,  Obsolete from Ry6xx, use DAC2 Output instead
+  USHORT EnableScaler;                           //Atomic Table,  used only by Bios
+  USHORT BlankCRTC;                              //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT EnableCRTC;                             //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT GetPixelClock;                          //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT EnableVGA_Render;                       //Function Table,directly used by various SW components,latest version 1.1
+  USHORT GetSCLKOverMCLKRatio;                   //Atomic Table,  only used by Bios
+  USHORT SetCRTC_Timing;                         //Atomic Table,  directly used by various SW components,latest version 1.1
+  USHORT SetCRTC_OverScan;                       //Atomic Table,  used by various SW components,latest version 1.1 
+  USHORT SetCRTC_Replication;                    //Atomic Table,  used only by Bios
+  USHORT SelectCRTC_Source;                      //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT EnableGraphSurfaces;                    //Atomic Table,  used only by Bios
+  USHORT UpdateCRTC_DoubleBufferRegisters;
+  USHORT LUT_AutoFill;                           //Atomic Table,  only used by Bios
+  USHORT EnableHW_IconCursor;                    //Atomic Table,  only used by Bios
+  USHORT GetMemoryClock;                         //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT GetEngineClock;                         //Atomic Table,  directly used by various SW components,latest version 1.1 
+  USHORT SetCRTC_UsingDTDTiming;                 //Atomic Table,  directly used by various SW components,latest version 1.1
+  USHORT ExternalEncoderControl;                 //Atomic Table,  directly used by various SW components,latest version 2.1
+  USHORT LVTMAOutputControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1
+  USHORT VRAM_BlockDetectionByStrap;             //Atomic Table,  used only by Bios
+  USHORT MemoryCleanUp;                          //Atomic Table,  only used by Bios    
+  USHORT ProcessI2cChannelTransaction;           //Function Table,only used by Bios
+  USHORT WriteOneByteToHWAssistedI2C;            //Function Table,indirectly used by various SW components 
+  USHORT ReadHWAssistedI2CStatus;                //Atomic Table,  indirectly used by various SW components
+  USHORT SpeedFanControl;                        //Function Table,indirectly used by various SW components,called from ASIC_Init
+  USHORT PowerConnectorDetection;                //Atomic Table,  directly used by various SW components,latest version 1.1
+  USHORT MC_Synchronization;                     //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+  USHORT ComputeMemoryEnginePLL;                 //Atomic Table,  indirectly used by various SW components,called from SetMemory/EngineClock
+  USHORT MemoryRefreshConversion;                //Atomic Table,  indirectly used by various SW components,called from SetMemory or SetEngineClock
+  USHORT VRAM_GetCurrentInfoBlock;               //Atomic Table,  used only by Bios
+  USHORT DynamicMemorySettings;                  //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+  USHORT MemoryTraining;                         //Atomic Table,  used only by Bios
+  USHORT EnableSpreadSpectrumOnPPLL;             //Atomic Table,  directly used by various SW components,latest version 1.2
+  USHORT TMDSAOutputControl;                     //Atomic Table,  directly used by various SW components,latest version 1.1
+  USHORT SetVoltage;                             //Function Table,directly and/or indirectly used by various SW components,latest version 1.1
+  USHORT DAC1OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1
+  USHORT DAC2OutputControl;                      //Atomic Table,  directly used by various SW components,latest version 1.1
+  USHORT SetupHWAssistedI2CStatus;               //Function Table,only used by Bios, obsolete soon.Switch to use "ReadEDIDFromHWAssistedI2C"
+  USHORT ClockSource;                            //Atomic Table,  indirectly used by various SW components,called from ASIC_Init
+  USHORT MemoryDeviceInit;                       //Atomic Table,  indirectly used by various SW components,called from SetMemoryClock
+  USHORT EnableYUV;                              //Atomic Table,  indirectly used by various SW components,called from EnableVGARender
+  USHORT DIG1EncoderControl;                     //Atomic Table,directly used by various SW components,latest version 1.1
+  USHORT DIG2EncoderControl;                     //Atomic Table,directly used by various SW components,latest version 1.1
+  USHORT DIG1TransmitterControl;                 //Atomic Table,directly used by various SW components,latest version 1.1
+  USHORT DIG2TransmitterControl;	               //Atomic Table,directly used by various SW components,latest version 1.1 
+  USHORT ProcessAuxChannelTransaction;					 //Function Table,only used by Bios
+  USHORT DPEncoderService;											 //Function Table,only used by Bios
+}ATOM_MASTER_LIST_OF_COMMAND_TABLES;   
 
-/*  For backward compatible */
+// For backward compatible 
 #define ReadEDIDFromHWAssistedI2C                ProcessI2cChannelTransaction
 #define UNIPHYTransmitterControl						     DIG1TransmitterControl
 #define LVTMATransmitterControl							     DIG2TransmitterControl
 #define SetCRTC_DPM_State                        GetConditionalGoldenSetting
 #define SetUniphyInstance                        ASIC_StaticPwrMgtStatusChange
+#define HPDInterruptService                      ReadHWAssistedI2CStatus
+#define EnableVGA_Access                         GetSCLKOverMCLKRatio
 
-typedef struct _ATOM_MASTER_COMMAND_TABLE {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_MASTER_LIST_OF_COMMAND_TABLES ListOfCommandTables;
-} ATOM_MASTER_COMMAND_TABLE;
+typedef struct _ATOM_MASTER_COMMAND_TABLE
+{
+  ATOM_COMMON_TABLE_HEADER           sHeader;
+  ATOM_MASTER_LIST_OF_COMMAND_TABLES ListOfCommandTables;
+}ATOM_MASTER_COMMAND_TABLE;
 
-/****************************************************************************/
-/*  Structures used in every command table */
-/****************************************************************************/
-typedef struct _ATOM_TABLE_ATTRIBUTE {
+/****************************************************************************/	
+// Structures used in every command table
+/****************************************************************************/	
+typedef struct _ATOM_TABLE_ATTRIBUTE
+{
 #if ATOM_BIG_ENDIAN
-	USHORT UpdatedByUtility:1;	/* [15]=Table updated by utility flag */
-	USHORT PS_SizeInBytes:7;	/* [14:8]=Size of parameter space in Bytes (multiple of a dword), */
-	USHORT WS_SizeInBytes:8;	/* [7:0]=Size of workspace in Bytes (in multiple of a dword), */
+  USHORT  UpdatedByUtility:1;         //[15]=Table updated by utility flag
+  USHORT  PS_SizeInBytes:7;           //[14:8]=Size of parameter space in Bytes (multiple of a dword), 
+  USHORT  WS_SizeInBytes:8;           //[7:0]=Size of workspace in Bytes (in multiple of a dword), 
 #else
-	USHORT WS_SizeInBytes:8;	/* [7:0]=Size of workspace in Bytes (in multiple of a dword), */
-	USHORT PS_SizeInBytes:7;	/* [14:8]=Size of parameter space in Bytes (multiple of a dword), */
-	USHORT UpdatedByUtility:1;	/* [15]=Table updated by utility flag */
+  USHORT  WS_SizeInBytes:8;           //[7:0]=Size of workspace in Bytes (in multiple of a dword), 
+  USHORT  PS_SizeInBytes:7;           //[14:8]=Size of parameter space in Bytes (multiple of a dword), 
+  USHORT  UpdatedByUtility:1;         //[15]=Table updated by utility flag
 #endif
-} ATOM_TABLE_ATTRIBUTE;
+}ATOM_TABLE_ATTRIBUTE;
 
-typedef union _ATOM_TABLE_ATTRIBUTE_ACCESS {
-	ATOM_TABLE_ATTRIBUTE sbfAccess;
-	USHORT susAccess;
-} ATOM_TABLE_ATTRIBUTE_ACCESS;
+typedef union _ATOM_TABLE_ATTRIBUTE_ACCESS
+{
+  ATOM_TABLE_ATTRIBUTE sbfAccess;
+  USHORT               susAccess;
+}ATOM_TABLE_ATTRIBUTE_ACCESS;
 
-/****************************************************************************/
-/*  Common header for all command tables. */
-/*  Every table pointed by _ATOM_MASTER_COMMAND_TABLE has this common header. */
-/*  And the pointer actually points to this header. */
-/****************************************************************************/
-typedef struct _ATOM_COMMON_ROM_COMMAND_TABLE_HEADER {
-	ATOM_COMMON_TABLE_HEADER CommonHeader;
-	ATOM_TABLE_ATTRIBUTE TableAttribute;
-} ATOM_COMMON_ROM_COMMAND_TABLE_HEADER;
+/****************************************************************************/	
+// Common header for all command tables.
+// Every table pointed by _ATOM_MASTER_COMMAND_TABLE has this common header. 
+// And the pointer actually points to this header.
+/****************************************************************************/	
+typedef struct _ATOM_COMMON_ROM_COMMAND_TABLE_HEADER
+{
+  ATOM_COMMON_TABLE_HEADER CommonHeader;
+  ATOM_TABLE_ATTRIBUTE     TableAttribute;	
+}ATOM_COMMON_ROM_COMMAND_TABLE_HEADER;
 
-/****************************************************************************/
-/*  Structures used by ComputeMemoryEnginePLLTable */
-/****************************************************************************/
+/****************************************************************************/	
+// Structures used by ComputeMemoryEnginePLLTable
+/****************************************************************************/	
 #define COMPUTE_MEMORY_PLL_PARAM        1
 #define COMPUTE_ENGINE_PLL_PARAM        2
 
-typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS {
-	ULONG ulClock;		/* When returen, it's the re-calculated clock based on given Fb_div Post_Div and ref_div */
-	UCHAR ucAction;		/* 0:reserved //1:Memory //2:Engine */
-	UCHAR ucReserved;	/* may expand to return larger Fbdiv later */
-	UCHAR ucFbDiv;		/* return value */
-	UCHAR ucPostDiv;	/* return value */
-} COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS;
+typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS
+{
+  ULONG   ulClock;        //When returen, it's the re-calculated clock based on given Fb_div Post_Div and ref_div
+  UCHAR   ucAction;       //0:reserved //1:Memory //2:Engine  
+  UCHAR   ucReserved;     //may expand to return larger Fbdiv later
+  UCHAR   ucFbDiv;        //return value
+  UCHAR   ucPostDiv;      //return value
+}COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS;
 
-typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2 {
-	ULONG ulClock;		/* When return, [23:0] return real clock */
-	UCHAR ucAction;		/* 0:reserved;COMPUTE_MEMORY_PLL_PARAM:Memory;COMPUTE_ENGINE_PLL_PARAM:Engine. it return ref_div to be written to register */
-	USHORT usFbDiv;		/* return Feedback value to be written to register */
-	UCHAR ucPostDiv;	/* return post div to be written to register */
-} COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2;
+typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2
+{
+  ULONG   ulClock;        //When return, [23:0] return real clock 
+  UCHAR   ucAction;       //0:reserved;COMPUTE_MEMORY_PLL_PARAM:Memory;COMPUTE_ENGINE_PLL_PARAM:Engine. it return ref_div to be written to register
+  USHORT  usFbDiv;		    //return Feedback value to be written to register
+  UCHAR   ucPostDiv;      //return post div to be written to register
+}COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2;
 #define COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION   COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS
 
-#define SET_CLOCK_FREQ_MASK                     0x00FFFFFF	/* Clock change tables only take bit [23:0] as the requested clock value */
-#define USE_NON_BUS_CLOCK_MASK                  0x01000000	/* Applicable to both memory and engine clock change, when set, it uses another clock as the temporary clock (engine uses memory and vice versa) */
-#define USE_MEMORY_SELF_REFRESH_MASK            0x02000000	/* Only applicable to memory clock change, when set, using memory self refresh during clock transition */
-#define SKIP_INTERNAL_MEMORY_PARAMETER_CHANGE   0x04000000	/* Only applicable to memory clock change, when set, the table will skip predefined internal memory parameter change */
-#define FIRST_TIME_CHANGE_CLOCK									0x08000000	/* Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup */
-#define SKIP_SW_PROGRAM_PLL											0x10000000	/* Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL */
+
+#define SET_CLOCK_FREQ_MASK                     0x00FFFFFF  //Clock change tables only take bit [23:0] as the requested clock value
+#define USE_NON_BUS_CLOCK_MASK                  0x01000000  //Applicable to both memory and engine clock change, when set, it uses another clock as the temporary clock (engine uses memory and vice versa)
+#define USE_MEMORY_SELF_REFRESH_MASK            0x02000000	//Only applicable to memory clock change, when set, using memory self refresh during clock transition
+#define SKIP_INTERNAL_MEMORY_PARAMETER_CHANGE   0x04000000  //Only applicable to memory clock change, when set, the table will skip predefined internal memory parameter change
+#define FIRST_TIME_CHANGE_CLOCK									0x08000000	//Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup
+#define SKIP_SW_PROGRAM_PLL											0x10000000	//Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL
 #define USE_SS_ENABLED_PIXEL_CLOCK  USE_NON_BUS_CLOCK_MASK
 
-#define b3USE_NON_BUS_CLOCK_MASK                  0x01	/* Applicable to both memory and engine clock change, when set, it uses another clock as the temporary clock (engine uses memory and vice versa) */
-#define b3USE_MEMORY_SELF_REFRESH                 0x02	/* Only applicable to memory clock change, when set, using memory self refresh during clock transition */
-#define b3SKIP_INTERNAL_MEMORY_PARAMETER_CHANGE   0x04	/* Only applicable to memory clock change, when set, the table will skip predefined internal memory parameter change */
-#define b3FIRST_TIME_CHANGE_CLOCK									0x08	/* Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup */
-#define b3SKIP_SW_PROGRAM_PLL											0x10	/* Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL */
+#define b3USE_NON_BUS_CLOCK_MASK                  0x01       //Applicable to both memory and engine clock change, when set, it uses another clock as the temporary clock (engine uses memory and vice versa)
+#define b3USE_MEMORY_SELF_REFRESH                 0x02	     //Only applicable to memory clock change, when set, using memory self refresh during clock transition
+#define b3SKIP_INTERNAL_MEMORY_PARAMETER_CHANGE   0x04       //Only applicable to memory clock change, when set, the table will skip predefined internal memory parameter change
+#define b3FIRST_TIME_CHANGE_CLOCK									0x08       //Applicable to both memory and engine clock change,when set, it means this is 1st time to change clock after ASIC bootup
+#define b3SKIP_SW_PROGRAM_PLL											0x10			 //Applicable to both memory and engine clock change, when set, it means the table will not program SPLL/MPLL
 
-typedef struct _ATOM_COMPUTE_CLOCK_FREQ {
+typedef struct _ATOM_COMPUTE_CLOCK_FREQ
+{
 #if ATOM_BIG_ENDIAN
-	ULONG ulComputeClockFlag:8;	/*  =1: COMPUTE_MEMORY_PLL_PARAM, =2: COMPUTE_ENGINE_PLL_PARAM */
-	ULONG ulClockFreq:24;	/*  in unit of 10kHz */
+  ULONG ulComputeClockFlag:8;                 // =1: COMPUTE_MEMORY_PLL_PARAM, =2: COMPUTE_ENGINE_PLL_PARAM
+  ULONG ulClockFreq:24;                       // in unit of 10kHz
 #else
-	ULONG ulClockFreq:24;	/*  in unit of 10kHz */
-	ULONG ulComputeClockFlag:8;	/*  =1: COMPUTE_MEMORY_PLL_PARAM, =2: COMPUTE_ENGINE_PLL_PARAM */
+  ULONG ulClockFreq:24;                       // in unit of 10kHz
+  ULONG ulComputeClockFlag:8;                 // =1: COMPUTE_MEMORY_PLL_PARAM, =2: COMPUTE_ENGINE_PLL_PARAM
 #endif
-} ATOM_COMPUTE_CLOCK_FREQ;
+}ATOM_COMPUTE_CLOCK_FREQ;
 
-typedef struct _ATOM_S_MPLL_FB_DIVIDER {
-	USHORT usFbDivFrac;
-	USHORT usFbDiv;
-} ATOM_S_MPLL_FB_DIVIDER;
+typedef struct _ATOM_S_MPLL_FB_DIVIDER
+{
+  USHORT usFbDivFrac;  
+  USHORT usFbDiv;  
+}ATOM_S_MPLL_FB_DIVIDER;
 
-typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3 {
-	union {
-		ATOM_COMPUTE_CLOCK_FREQ ulClock;	/* Input Parameter */
-		ATOM_S_MPLL_FB_DIVIDER ulFbDiv;	/* Output Parameter */
-	};
-	UCHAR ucRefDiv;		/* Output Parameter */
-	UCHAR ucPostDiv;	/* Output Parameter */
-	UCHAR ucCntlFlag;	/* Output Parameter */
-	UCHAR ucReserved;
-} COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3;
+typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3
+{
+  union
+  {
+    ATOM_COMPUTE_CLOCK_FREQ  ulClock;         //Input Parameter
+    ATOM_S_MPLL_FB_DIVIDER   ulFbDiv;         //Output Parameter
+  };
+  UCHAR   ucRefDiv;                           //Output Parameter      
+  UCHAR   ucPostDiv;                          //Output Parameter      
+  UCHAR   ucCntlFlag;                         //Output Parameter      
+  UCHAR   ucReserved;
+}COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3;
 
-/*  ucCntlFlag */
+// ucCntlFlag
 #define ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN          1
 #define ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE            2
 #define ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE         4
+#define ATOM_PLL_CNTL_FLAG_SPLL_ISPARE_9						8
 
-typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER {
-	ATOM_COMPUTE_CLOCK_FREQ ulClock;
-	ULONG ulReserved[2];
-} DYNAMICE_MEMORY_SETTINGS_PARAMETER;
 
-typedef struct _DYNAMICE_ENGINE_SETTINGS_PARAMETER {
-	ATOM_COMPUTE_CLOCK_FREQ ulClock;
-	ULONG ulMemoryClock;
-	ULONG ulReserved;
-} DYNAMICE_ENGINE_SETTINGS_PARAMETER;
+// V4 are only used for APU which PLL outside GPU
+typedef struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4
+{
+#if ATOM_BIG_ENDIAN
+  ULONG  ucPostDiv;          //return parameter: post divider which is used to program to register directly
+  ULONG  ulClock:24;         //Input= target clock, output = actual clock 
+#else
+  ULONG  ulClock:24;         //Input= target clock, output = actual clock 
+  ULONG  ucPostDiv;          //return parameter: post divider which is used to program to register directly
+#endif
+}COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4;
 
-/****************************************************************************/
-/*  Structures used by SetEngineClockTable */
-/****************************************************************************/
-typedef struct _SET_ENGINE_CLOCK_PARAMETERS {
-	ULONG ulTargetEngineClock;	/* In 10Khz unit */
-} SET_ENGINE_CLOCK_PARAMETERS;
+typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER
+{
+  ATOM_COMPUTE_CLOCK_FREQ ulClock;
+  ULONG ulReserved[2];
+}DYNAMICE_MEMORY_SETTINGS_PARAMETER;
 
-typedef struct _SET_ENGINE_CLOCK_PS_ALLOCATION {
-	ULONG ulTargetEngineClock;	/* In 10Khz unit */
-	COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved;
-} SET_ENGINE_CLOCK_PS_ALLOCATION;
+typedef struct _DYNAMICE_ENGINE_SETTINGS_PARAMETER
+{
+  ATOM_COMPUTE_CLOCK_FREQ ulClock;
+  ULONG ulMemoryClock;
+  ULONG ulReserved;
+}DYNAMICE_ENGINE_SETTINGS_PARAMETER;
 
-/****************************************************************************/
-/*  Structures used by SetMemoryClockTable */
-/****************************************************************************/
-typedef struct _SET_MEMORY_CLOCK_PARAMETERS {
-	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
-} SET_MEMORY_CLOCK_PARAMETERS;
+/****************************************************************************/	
+// Structures used by SetEngineClockTable
+/****************************************************************************/	
+typedef struct _SET_ENGINE_CLOCK_PARAMETERS
+{
+  ULONG ulTargetEngineClock;          //In 10Khz unit
+}SET_ENGINE_CLOCK_PARAMETERS;
 
-typedef struct _SET_MEMORY_CLOCK_PS_ALLOCATION {
-	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
-	COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved;
-} SET_MEMORY_CLOCK_PS_ALLOCATION;
+typedef struct _SET_ENGINE_CLOCK_PS_ALLOCATION
+{
+  ULONG ulTargetEngineClock;          //In 10Khz unit
+  COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved;
+}SET_ENGINE_CLOCK_PS_ALLOCATION;
 
-/****************************************************************************/
-/*  Structures used by ASIC_Init.ctb */
-/****************************************************************************/
-typedef struct _ASIC_INIT_PARAMETERS {
-	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
-	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
-} ASIC_INIT_PARAMETERS;
+/****************************************************************************/	
+// Structures used by SetMemoryClockTable
+/****************************************************************************/	
+typedef struct _SET_MEMORY_CLOCK_PARAMETERS
+{
+  ULONG ulTargetMemoryClock;          //In 10Khz unit
+}SET_MEMORY_CLOCK_PARAMETERS;
 
-typedef struct _ASIC_INIT_PS_ALLOCATION {
-	ASIC_INIT_PARAMETERS sASICInitClocks;
-	SET_ENGINE_CLOCK_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this structure */
-} ASIC_INIT_PS_ALLOCATION;
+typedef struct _SET_MEMORY_CLOCK_PS_ALLOCATION
+{
+  ULONG ulTargetMemoryClock;          //In 10Khz unit
+  COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_PS_ALLOCATION sReserved;
+}SET_MEMORY_CLOCK_PS_ALLOCATION;
 
-/****************************************************************************/
-/*  Structure used by DynamicClockGatingTable.ctb */
-/****************************************************************************/
-typedef struct _DYNAMIC_CLOCK_GATING_PARAMETERS {
-	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
-	UCHAR ucPadding[3];
-} DYNAMIC_CLOCK_GATING_PARAMETERS;
+/****************************************************************************/	
+// Structures used by ASIC_Init.ctb
+/****************************************************************************/	
+typedef struct _ASIC_INIT_PARAMETERS
+{
+  ULONG ulDefaultEngineClock;         //In 10Khz unit
+  ULONG ulDefaultMemoryClock;         //In 10Khz unit
+}ASIC_INIT_PARAMETERS;
+
+typedef struct _ASIC_INIT_PS_ALLOCATION
+{
+  ASIC_INIT_PARAMETERS sASICInitClocks;
+  SET_ENGINE_CLOCK_PS_ALLOCATION sReserved; //Caller doesn't need to init this structure
+}ASIC_INIT_PS_ALLOCATION;
+
+/****************************************************************************/	
+// Structure used by DynamicClockGatingTable.ctb
+/****************************************************************************/	
+typedef struct _DYNAMIC_CLOCK_GATING_PARAMETERS 
+{
+  UCHAR ucEnable;                     // ATOM_ENABLE or ATOM_DISABLE
+  UCHAR ucPadding[3];
+}DYNAMIC_CLOCK_GATING_PARAMETERS;
 #define  DYNAMIC_CLOCK_GATING_PS_ALLOCATION  DYNAMIC_CLOCK_GATING_PARAMETERS
 
-/****************************************************************************/
-/*  Structure used by EnableASIC_StaticPwrMgtTable.ctb */
-/****************************************************************************/
-typedef struct _ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS {
-	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
-	UCHAR ucPadding[3];
-} ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS;
+/****************************************************************************/	
+// Structure used by EnableASIC_StaticPwrMgtTable.ctb
+/****************************************************************************/	
+typedef struct _ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS
+{
+  UCHAR ucEnable;                     // ATOM_ENABLE or ATOM_DISABLE
+  UCHAR ucPadding[3];
+}ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS;
 #define ENABLE_ASIC_STATIC_PWR_MGT_PS_ALLOCATION  ENABLE_ASIC_STATIC_PWR_MGT_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by DAC_LoadDetectionTable.ctb */
-/****************************************************************************/
-typedef struct _DAC_LOAD_DETECTION_PARAMETERS {
-	USHORT usDeviceID;	/* {ATOM_DEVICE_CRTx_SUPPORT,ATOM_DEVICE_TVx_SUPPORT,ATOM_DEVICE_CVx_SUPPORT} */
-	UCHAR ucDacType;	/* {ATOM_DAC_A,ATOM_DAC_B, ATOM_EXT_DAC} */
-	UCHAR ucMisc;		/* Valid only when table revision =1.3 and above */
-} DAC_LOAD_DETECTION_PARAMETERS;
+/****************************************************************************/	
+// Structures used by DAC_LoadDetectionTable.ctb
+/****************************************************************************/	
+typedef struct _DAC_LOAD_DETECTION_PARAMETERS
+{
+  USHORT usDeviceID;                  //{ATOM_DEVICE_CRTx_SUPPORT,ATOM_DEVICE_TVx_SUPPORT,ATOM_DEVICE_CVx_SUPPORT}
+  UCHAR  ucDacType;                   //{ATOM_DAC_A,ATOM_DAC_B, ATOM_EXT_DAC}
+  UCHAR  ucMisc;											//Valid only when table revision =1.3 and above
+}DAC_LOAD_DETECTION_PARAMETERS;
 
-/*  DAC_LOAD_DETECTION_PARAMETERS.ucMisc */
+// DAC_LOAD_DETECTION_PARAMETERS.ucMisc
 #define DAC_LOAD_MISC_YPrPb						0x01
 
-typedef struct _DAC_LOAD_DETECTION_PS_ALLOCATION {
-	DAC_LOAD_DETECTION_PARAMETERS sDacload;
-	ULONG Reserved[2];	/*  Don't set this one, allocation for EXT DAC */
-} DAC_LOAD_DETECTION_PS_ALLOCATION;
+typedef struct _DAC_LOAD_DETECTION_PS_ALLOCATION
+{
+  DAC_LOAD_DETECTION_PARAMETERS            sDacload;
+  ULONG                                    Reserved[2];// Don't set this one, allocation for EXT DAC
+}DAC_LOAD_DETECTION_PS_ALLOCATION;
 
-/****************************************************************************/
-/*  Structures used by DAC1EncoderControlTable.ctb and DAC2EncoderControlTable.ctb */
-/****************************************************************************/
-typedef struct _DAC_ENCODER_CONTROL_PARAMETERS {
-	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-	UCHAR ucDacStandard;	/*  See definition of ATOM_DACx_xxx, For DEC3.0, bit 7 used as internal flag to indicate DAC2 (==1) or DAC1 (==0) */
-	UCHAR ucAction;		/*  0: turn off encoder */
-	/*  1: setup and turn on encoder */
-	/*  7: ATOM_ENCODER_INIT Initialize DAC */
-} DAC_ENCODER_CONTROL_PARAMETERS;
+/****************************************************************************/	
+// Structures used by DAC1EncoderControlTable.ctb and DAC2EncoderControlTable.ctb
+/****************************************************************************/	
+typedef struct _DAC_ENCODER_CONTROL_PARAMETERS 
+{
+  USHORT usPixelClock;                // in 10KHz; for bios convenient
+  UCHAR  ucDacStandard;               // See definition of ATOM_DACx_xxx, For DEC3.0, bit 7 used as internal flag to indicate DAC2 (==1) or DAC1 (==0)
+  UCHAR  ucAction;                    // 0: turn off encoder
+                                      // 1: setup and turn on encoder
+                                      // 7: ATOM_ENCODER_INIT Initialize DAC
+}DAC_ENCODER_CONTROL_PARAMETERS;
 
 #define DAC_ENCODER_CONTROL_PS_ALLOCATION  DAC_ENCODER_CONTROL_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by DIG1EncoderControlTable */
-/*                     DIG2EncoderControlTable */
-/*                     ExternalEncoderControlTable */
-/****************************************************************************/
-typedef struct _DIG_ENCODER_CONTROL_PARAMETERS {
-	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-	UCHAR ucConfig;
-	/*  [2] Link Select: */
-	/*  =0: PHY linkA if bfLane<3 */
-	/*  =1: PHY linkB if bfLanes<3 */
-	/*  =0: PHY linkA+B if bfLanes=3 */
-	/*  [3] Transmitter Sel */
-	/*  =0: UNIPHY or PCIEPHY */
-	/*  =1: LVTMA */
-	UCHAR ucAction;		/*  =0: turn off encoder */
-	/*  =1: turn on encoder */
-	UCHAR ucEncoderMode;
-	/*  =0: DP   encoder */
-	/*  =1: LVDS encoder */
-	/*  =2: DVI  encoder */
-	/*  =3: HDMI encoder */
-	/*  =4: SDVO encoder */
-	UCHAR ucLaneNum;	/*  how many lanes to enable */
-	UCHAR ucReserved[2];
-} DIG_ENCODER_CONTROL_PARAMETERS;
+/****************************************************************************/	
+// Structures used by DIG1EncoderControlTable
+//                    DIG2EncoderControlTable
+//                    ExternalEncoderControlTable
+/****************************************************************************/	
+typedef struct _DIG_ENCODER_CONTROL_PARAMETERS
+{
+  USHORT usPixelClock;		// in 10KHz; for bios convenient
+  UCHAR  ucConfig;		  
+                            // [2] Link Select:
+                            // =0: PHY linkA if bfLane<3
+                            // =1: PHY linkB if bfLanes<3
+                            // =0: PHY linkA+B if bfLanes=3
+                            // [3] Transmitter Sel
+                            // =0: UNIPHY or PCIEPHY
+                            // =1: LVTMA 					
+  UCHAR ucAction;           // =0: turn off encoder					
+                            // =1: turn on encoder			
+  UCHAR ucEncoderMode;
+                            // =0: DP   encoder      
+                            // =1: LVDS encoder          
+                            // =2: DVI  encoder  
+                            // =3: HDMI encoder
+                            // =4: SDVO encoder
+  UCHAR ucLaneNum;          // how many lanes to enable
+  UCHAR ucReserved[2];
+}DIG_ENCODER_CONTROL_PARAMETERS;
 #define DIG_ENCODER_CONTROL_PS_ALLOCATION			  DIG_ENCODER_CONTROL_PARAMETERS
 #define EXTERNAL_ENCODER_CONTROL_PARAMETER			DIG_ENCODER_CONTROL_PARAMETERS
 
-/* ucConfig */
+//ucConfig
 #define ATOM_ENCODER_CONFIG_DPLINKRATE_MASK				0x01
 #define ATOM_ENCODER_CONFIG_DPLINKRATE_1_62GHZ		0x00
 #define ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ		0x01
@@ -539,52 +593,57 @@
 #define ATOM_ENCODER_CONFIG_LVTMA								  0x08
 #define ATOM_ENCODER_CONFIG_TRANSMITTER1				  0x00
 #define ATOM_ENCODER_CONFIG_TRANSMITTER2				  0x08
-#define ATOM_ENCODER_CONFIG_DIGB								  0x80	/*  VBIOS Internal use, outside SW should set this bit=0 */
-/*  ucAction */
-/*  ATOM_ENABLE:  Enable Encoder */
-/*  ATOM_DISABLE: Disable Encoder */
+#define ATOM_ENCODER_CONFIG_DIGB								  0x80			// VBIOS Internal use, outside SW should set this bit=0
+// ucAction
+// ATOM_ENABLE:  Enable Encoder
+// ATOM_DISABLE: Disable Encoder
 
-/* ucEncoderMode */
+//ucEncoderMode
 #define ATOM_ENCODER_MODE_DP											0
 #define ATOM_ENCODER_MODE_LVDS										1
 #define ATOM_ENCODER_MODE_DVI											2
 #define ATOM_ENCODER_MODE_HDMI										3
 #define ATOM_ENCODER_MODE_SDVO										4
+#define ATOM_ENCODER_MODE_DP_AUDIO                5
 #define ATOM_ENCODER_MODE_TV											13
 #define ATOM_ENCODER_MODE_CV											14
 #define ATOM_ENCODER_MODE_CRT											15
 
-typedef struct _ATOM_DIG_ENCODER_CONFIG_V2 {
+typedef struct _ATOM_DIG_ENCODER_CONFIG_V2
+{
 #if ATOM_BIG_ENDIAN
-	UCHAR ucReserved1:2;
-	UCHAR ucTransmitterSel:2;	/*  =0: UniphyAB, =1: UniphyCD  =2: UniphyEF */
-	UCHAR ucLinkSel:1;	/*  =0: linkA/C/E =1: linkB/D/F */
-	UCHAR ucReserved:1;
-	UCHAR ucDPLinkRate:1;	/*  =0: 1.62Ghz, =1: 2.7Ghz */
+    UCHAR ucReserved1:2;
+    UCHAR ucTransmitterSel:2;     // =0: UniphyAB, =1: UniphyCD  =2: UniphyEF
+    UCHAR ucLinkSel:1;            // =0: linkA/C/E =1: linkB/D/F
+    UCHAR ucReserved:1;
+    UCHAR ucDPLinkRate:1;         // =0: 1.62Ghz, =1: 2.7Ghz
 #else
-	UCHAR ucDPLinkRate:1;	/*  =0: 1.62Ghz, =1: 2.7Ghz */
-	UCHAR ucReserved:1;
-	UCHAR ucLinkSel:1;	/*  =0: linkA/C/E =1: linkB/D/F */
-	UCHAR ucTransmitterSel:2;	/*  =0: UniphyAB, =1: UniphyCD  =2: UniphyEF */
-	UCHAR ucReserved1:2;
+    UCHAR ucDPLinkRate:1;         // =0: 1.62Ghz, =1: 2.7Ghz
+    UCHAR ucReserved:1;
+    UCHAR ucLinkSel:1;            // =0: linkA/C/E =1: linkB/D/F
+    UCHAR ucTransmitterSel:2;     // =0: UniphyAB, =1: UniphyCD  =2: UniphyEF
+    UCHAR ucReserved1:2;
 #endif
-} ATOM_DIG_ENCODER_CONFIG_V2;
+}ATOM_DIG_ENCODER_CONFIG_V2;
 
-typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V2 {
-	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-	ATOM_DIG_ENCODER_CONFIG_V2 acConfig;
-	UCHAR ucAction;
-	UCHAR ucEncoderMode;
-	/*  =0: DP   encoder */
-	/*  =1: LVDS encoder */
-	/*  =2: DVI  encoder */
-	/*  =3: HDMI encoder */
-	/*  =4: SDVO encoder */
-	UCHAR ucLaneNum;	/*  how many lanes to enable */
-	UCHAR ucReserved[2];
-} DIG_ENCODER_CONTROL_PARAMETERS_V2;
 
-/* ucConfig */
+typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V2
+{
+  USHORT usPixelClock;      // in 10KHz; for bios convenient
+  ATOM_DIG_ENCODER_CONFIG_V2 acConfig;
+  UCHAR ucAction;                                       
+  UCHAR ucEncoderMode;
+                            // =0: DP   encoder      
+                            // =1: LVDS encoder          
+                            // =2: DVI  encoder  
+                            // =3: HDMI encoder
+                            // =4: SDVO encoder
+  UCHAR ucLaneNum;          // how many lanes to enable
+  UCHAR ucStatus;           // = DP_LINK_TRAINING_COMPLETE or DP_LINK_TRAINING_INCOMPLETE, only used by VBIOS with command ATOM_ENCODER_CMD_QUERY_DP_LINK_TRAINING_STATUS
+  UCHAR ucReserved;
+}DIG_ENCODER_CONTROL_PARAMETERS_V2;
+
+//ucConfig
 #define ATOM_ENCODER_CONFIG_V2_DPLINKRATE_MASK				0x01
 #define ATOM_ENCODER_CONFIG_V2_DPLINKRATE_1_62GHZ		  0x00
 #define ATOM_ENCODER_CONFIG_V2_DPLINKRATE_2_70GHZ		  0x01
@@ -596,58 +655,122 @@
 #define ATOM_ENCODER_CONFIG_V2_TRANSMITTER2				    0x08
 #define ATOM_ENCODER_CONFIG_V2_TRANSMITTER3				    0x10
 
-/****************************************************************************/
-/*  Structures used by UNIPHYTransmitterControlTable */
-/*                     LVTMATransmitterControlTable */
-/*                     DVOOutputControlTable */
-/****************************************************************************/
-typedef struct _ATOM_DP_VS_MODE {
-	UCHAR ucLaneSel;
-	UCHAR ucLaneSet;
-} ATOM_DP_VS_MODE;
+// ucAction:
+// ATOM_DISABLE
+// ATOM_ENABLE
+#define ATOM_ENCODER_CMD_DP_LINK_TRAINING_START       0x08
+#define ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1    0x09
+#define ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2    0x0a
+#define ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE    0x0b
+#define ATOM_ENCODER_CMD_DP_VIDEO_OFF                 0x0c
+#define ATOM_ENCODER_CMD_DP_VIDEO_ON                  0x0d
+#define ATOM_ENCODER_CMD_QUERY_DP_LINK_TRAINING_STATUS    0x0e
+#define ATOM_ENCODER_CMD_SETUP                        0x0f
 
-typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS {
-	union {
-		USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-		USHORT usInitInfo;	/*  when init uniphy,lower 8bit is used for connector type defined in objectid.h */
-		ATOM_DP_VS_MODE asMode;	/*  DP Voltage swing mode */
+// ucStatus
+#define ATOM_ENCODER_STATUS_LINK_TRAINING_COMPLETE    0x10
+#define ATOM_ENCODER_STATUS_LINK_TRAINING_INCOMPLETE  0x00
+
+// Following function ENABLE sub-function will be used by driver when TMDS/HDMI/LVDS is used, disable function will be used by driver
+typedef struct _ATOM_DIG_ENCODER_CONFIG_V3
+{
+#if ATOM_BIG_ENDIAN
+    UCHAR ucReserved1:1;
+    UCHAR ucDigSel:3;             // =0: DIGA/B/C/D/E/F
+    UCHAR ucReserved:3;
+    UCHAR ucDPLinkRate:1;         // =0: 1.62Ghz, =1: 2.7Ghz
+#else
+    UCHAR ucDPLinkRate:1;         // =0: 1.62Ghz, =1: 2.7Ghz
+    UCHAR ucReserved:3;
+    UCHAR ucDigSel:3;             // =0: DIGA/B/C/D/E/F
+    UCHAR ucReserved1:1;
+#endif
+}ATOM_DIG_ENCODER_CONFIG_V3;
+
+#define ATOM_ENCODER_CONFIG_V3_ENCODER_SEL					  0x70
+
+
+typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V3
+{
+  USHORT usPixelClock;      // in 10KHz; for bios convenient
+  ATOM_DIG_ENCODER_CONFIG_V3 acConfig;
+  UCHAR ucAction;                              
+  UCHAR ucEncoderMode;
+                            // =0: DP   encoder      
+                            // =1: LVDS encoder          
+                            // =2: DVI  encoder  
+                            // =3: HDMI encoder
+                            // =4: SDVO encoder
+                            // =5: DP audio
+  UCHAR ucLaneNum;          // how many lanes to enable
+  UCHAR ucBitPerColor;      // only valid for DP mode when ucAction = ATOM_ENCODER_CMD_SETUP
+  UCHAR ucReserved;
+}DIG_ENCODER_CONTROL_PARAMETERS_V3;
+
+
+// define ucBitPerColor: 
+#define PANEL_BPC_UNDEFINE                               0x00
+#define PANEL_6BIT_PER_COLOR                             0x01 
+#define PANEL_8BIT_PER_COLOR                             0x02
+#define PANEL_10BIT_PER_COLOR                            0x03
+#define PANEL_12BIT_PER_COLOR                            0x04
+#define PANEL_16BIT_PER_COLOR                            0x05
+
+/****************************************************************************/	
+// Structures used by UNIPHYTransmitterControlTable
+//                    LVTMATransmitterControlTable
+//                    DVOOutputControlTable
+/****************************************************************************/	
+typedef struct _ATOM_DP_VS_MODE
+{
+  UCHAR ucLaneSel;
+  UCHAR ucLaneSet;
+}ATOM_DP_VS_MODE;
+
+typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS
+{
+	union
+	{
+  USHORT usPixelClock;		// in 10KHz; for bios convenient
+	USHORT usInitInfo;			// when init uniphy,lower 8bit is used for connector type defined in objectid.h
+  ATOM_DP_VS_MODE asMode; // DP Voltage swing mode
 	};
-	UCHAR ucConfig;
-	/*  [0]=0: 4 lane Link, */
-	/*     =1: 8 lane Link ( Dual Links TMDS ) */
-	/*  [1]=0: InCoherent mode */
-	/*     =1: Coherent Mode */
-	/*  [2] Link Select: */
-	/*  =0: PHY linkA   if bfLane<3 */
-	/*  =1: PHY linkB   if bfLanes<3 */
-	/*  =0: PHY linkA+B if bfLanes=3 */
-	/*  [5:4]PCIE lane Sel */
-	/*  =0: lane 0~3 or 0~7 */
-	/*  =1: lane 4~7 */
-	/*  =2: lane 8~11 or 8~15 */
-	/*  =3: lane 12~15 */
-	UCHAR ucAction;		/*  =0: turn off encoder */
-	/*  =1: turn on encoder */
-	UCHAR ucReserved[4];
-} DIG_TRANSMITTER_CONTROL_PARAMETERS;
+  UCHAR ucConfig;
+													// [0]=0: 4 lane Link,      
+													//    =1: 8 lane Link ( Dual Links TMDS ) 
+                          // [1]=0: InCoherent mode   
+													//    =1: Coherent Mode										
+													// [2] Link Select:
+  												// =0: PHY linkA   if bfLane<3
+													// =1: PHY linkB   if bfLanes<3
+		  										// =0: PHY linkA+B if bfLanes=3		
+                          // [5:4]PCIE lane Sel
+                          // =0: lane 0~3 or 0~7
+                          // =1: lane 4~7
+                          // =2: lane 8~11 or 8~15
+                          // =3: lane 12~15 
+	UCHAR ucAction;				  // =0: turn off encoder					
+	                        // =1: turn on encoder			
+  UCHAR ucReserved[4];
+}DIG_TRANSMITTER_CONTROL_PARAMETERS;
 
-#define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION		DIG_TRANSMITTER_CONTROL_PARAMETERS
+#define DIG_TRANSMITTER_CONTROL_PS_ALLOCATION		DIG_TRANSMITTER_CONTROL_PARAMETERS					
 
-/* ucInitInfo */
-#define ATOM_TRAMITTER_INITINFO_CONNECTOR_MASK	0x00ff
+//ucInitInfo
+#define ATOM_TRAMITTER_INITINFO_CONNECTOR_MASK	0x00ff			
 
-/* ucConfig */
+//ucConfig 
 #define ATOM_TRANSMITTER_CONFIG_8LANE_LINK			0x01
 #define ATOM_TRANSMITTER_CONFIG_COHERENT				0x02
 #define ATOM_TRANSMITTER_CONFIG_LINK_SEL_MASK		0x04
 #define ATOM_TRANSMITTER_CONFIG_LINKA						0x00
 #define ATOM_TRANSMITTER_CONFIG_LINKB						0x04
-#define ATOM_TRANSMITTER_CONFIG_LINKA_B					0x00
+#define ATOM_TRANSMITTER_CONFIG_LINKA_B					0x00			
 #define ATOM_TRANSMITTER_CONFIG_LINKB_A					0x04
 
-#define ATOM_TRANSMITTER_CONFIG_ENCODER_SEL_MASK	0x08	/*  only used when ATOM_TRANSMITTER_ACTION_ENABLE */
-#define ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER		0x00	/*  only used when ATOM_TRANSMITTER_ACTION_ENABLE */
-#define ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER		0x08	/*  only used when ATOM_TRANSMITTER_ACTION_ENABLE */
+#define ATOM_TRANSMITTER_CONFIG_ENCODER_SEL_MASK	0x08			// only used when ATOM_TRANSMITTER_ACTION_ENABLE
+#define ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER		0x00				// only used when ATOM_TRANSMITTER_ACTION_ENABLE
+#define ATOM_TRANSMITTER_CONFIG_DIG2_ENCODER		0x08				// only used when ATOM_TRANSMITTER_ACTION_ENABLE
 
 #define ATOM_TRANSMITTER_CONFIG_CLKSRC_MASK			0x30
 #define ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL			0x00
@@ -661,7 +784,7 @@
 #define ATOM_TRANSMITTER_CONFIG_LANE_8_15				0x80
 #define ATOM_TRANSMITTER_CONFIG_LANE_12_15			0xc0
 
-/* ucAction */
+//ucAction
 #define ATOM_TRANSMITTER_ACTION_DISABLE					       0
 #define ATOM_TRANSMITTER_ACTION_ENABLE					       1
 #define ATOM_TRANSMITTER_ACTION_LCD_BLOFF				       2
@@ -674,93 +797,168 @@
 #define ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT		       9
 #define ATOM_TRANSMITTER_ACTION_SETUP						       10
 #define ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH           11
+#define ATOM_TRANSMITTER_ACTION_POWER_ON               12
+#define ATOM_TRANSMITTER_ACTION_POWER_OFF              13
 
-/*  Following are used for DigTransmitterControlTable ver1.2 */
-typedef struct _ATOM_DIG_TRANSMITTER_CONFIG_V2 {
+// Following are used for DigTransmitterControlTable ver1.2
+typedef struct _ATOM_DIG_TRANSMITTER_CONFIG_V2
+{
 #if ATOM_BIG_ENDIAN
-	UCHAR ucTransmitterSel:2;	/* bit7:6: =0 Dig Transmitter 1 ( Uniphy AB ) */
-	/*         =1 Dig Transmitter 2 ( Uniphy CD ) */
-	/*         =2 Dig Transmitter 3 ( Uniphy EF ) */
-	UCHAR ucReserved:1;
-	UCHAR fDPConnector:1;	/* bit4=0: DP connector  =1: None DP connector */
-	UCHAR ucEncoderSel:1;	/* bit3=0: Data/Clk path source from DIGA( DIG inst0 ). =1: Data/clk path source from DIGB ( DIG inst1 ) */
-	UCHAR ucLinkSel:1;	/* bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E */
-	/*     =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F */
+  UCHAR ucTransmitterSel:2;         //bit7:6: =0 Dig Transmitter 1 ( Uniphy AB )
+                                    //        =1 Dig Transmitter 2 ( Uniphy CD )
+                                    //        =2 Dig Transmitter 3 ( Uniphy EF )
+  UCHAR ucReserved:1;               
+  UCHAR fDPConnector:1;             //bit4=0: DP connector  =1: None DP connector
+  UCHAR ucEncoderSel:1;             //bit3=0: Data/Clk path source from DIGA( DIG inst0 ). =1: Data/clk path source from DIGB ( DIG inst1 )
+  UCHAR ucLinkSel:1;                //bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E
+                                    //    =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F
 
-	UCHAR fCoherentMode:1;	/* bit1=1: Coherent Mode ( for DVI/HDMI mode ) */
-	UCHAR fDualLinkConnector:1;	/* bit0=1: Dual Link DVI connector */
+  UCHAR fCoherentMode:1;            //bit1=1: Coherent Mode ( for DVI/HDMI mode )
+  UCHAR fDualLinkConnector:1;       //bit0=1: Dual Link DVI connector
 #else
-	UCHAR fDualLinkConnector:1;	/* bit0=1: Dual Link DVI connector */
-	UCHAR fCoherentMode:1;	/* bit1=1: Coherent Mode ( for DVI/HDMI mode ) */
-	UCHAR ucLinkSel:1;	/* bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E */
-	/*     =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F */
-	UCHAR ucEncoderSel:1;	/* bit3=0: Data/Clk path source from DIGA( DIG inst0 ). =1: Data/clk path source from DIGB ( DIG inst1 ) */
-	UCHAR fDPConnector:1;	/* bit4=0: DP connector  =1: None DP connector */
-	UCHAR ucReserved:1;
-	UCHAR ucTransmitterSel:2;	/* bit7:6: =0 Dig Transmitter 1 ( Uniphy AB ) */
-	/*         =1 Dig Transmitter 2 ( Uniphy CD ) */
-	/*         =2 Dig Transmitter 3 ( Uniphy EF ) */
+  UCHAR fDualLinkConnector:1;       //bit0=1: Dual Link DVI connector
+  UCHAR fCoherentMode:1;            //bit1=1: Coherent Mode ( for DVI/HDMI mode )
+  UCHAR ucLinkSel:1;                //bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E
+                                    //    =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F
+  UCHAR ucEncoderSel:1;             //bit3=0: Data/Clk path source from DIGA( DIG inst0 ). =1: Data/clk path source from DIGB ( DIG inst1 )
+  UCHAR fDPConnector:1;             //bit4=0: DP connector  =1: None DP connector
+  UCHAR ucReserved:1;               
+  UCHAR ucTransmitterSel:2;         //bit7:6: =0 Dig Transmitter 1 ( Uniphy AB )
+                                    //        =1 Dig Transmitter 2 ( Uniphy CD )
+                                    //        =2 Dig Transmitter 3 ( Uniphy EF )
 #endif
-} ATOM_DIG_TRANSMITTER_CONFIG_V2;
+}ATOM_DIG_TRANSMITTER_CONFIG_V2;
 
-/* ucConfig */
-/* Bit0 */
+//ucConfig 
+//Bit0
 #define ATOM_TRANSMITTER_CONFIG_V2_DUAL_LINK_CONNECTOR			0x01
 
-/* Bit1 */
+//Bit1
 #define ATOM_TRANSMITTER_CONFIG_V2_COHERENT				          0x02
 
-/* Bit2 */
+//Bit2
 #define ATOM_TRANSMITTER_CONFIG_V2_LINK_SEL_MASK		        0x04
-#define ATOM_TRANSMITTER_CONFIG_V2_LINKA			            0x00
+#define ATOM_TRANSMITTER_CONFIG_V2_LINKA  			            0x00
 #define ATOM_TRANSMITTER_CONFIG_V2_LINKB				            0x04
 
-/*  Bit3 */
+// Bit3
 #define ATOM_TRANSMITTER_CONFIG_V2_ENCODER_SEL_MASK	        0x08
-#define ATOM_TRANSMITTER_CONFIG_V2_DIG1_ENCODER		          0x00	/*  only used when ucAction == ATOM_TRANSMITTER_ACTION_ENABLE or ATOM_TRANSMITTER_ACTION_SETUP */
-#define ATOM_TRANSMITTER_CONFIG_V2_DIG2_ENCODER		          0x08	/*  only used when ucAction == ATOM_TRANSMITTER_ACTION_ENABLE or ATOM_TRANSMITTER_ACTION_SETUP */
+#define ATOM_TRANSMITTER_CONFIG_V2_DIG1_ENCODER		          0x00				// only used when ucAction == ATOM_TRANSMITTER_ACTION_ENABLE or ATOM_TRANSMITTER_ACTION_SETUP
+#define ATOM_TRANSMITTER_CONFIG_V2_DIG2_ENCODER		          0x08				// only used when ucAction == ATOM_TRANSMITTER_ACTION_ENABLE or ATOM_TRANSMITTER_ACTION_SETUP
 
-/*  Bit4 */
+// Bit4
 #define ATOM_TRASMITTER_CONFIG_V2_DP_CONNECTOR			        0x10
 
-/*  Bit7:6 */
+// Bit7:6
 #define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER_SEL_MASK     0xC0
-#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER1			0x00	/* AB */
-#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER2			0x40	/* CD */
-#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER3			0x80	/* EF */
+#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER1           	0x00	//AB
+#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER2           	0x40	//CD
+#define ATOM_TRANSMITTER_CONFIG_V2_TRANSMITTER3           	0x80	//EF
 
-typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 {
-	union {
-		USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-		USHORT usInitInfo;	/*  when init uniphy,lower 8bit is used for connector type defined in objectid.h */
-		ATOM_DP_VS_MODE asMode;	/*  DP Voltage swing mode */
+typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V2
+{
+	union
+	{
+  USHORT usPixelClock;		// in 10KHz; for bios convenient
+	USHORT usInitInfo;			// when init uniphy,lower 8bit is used for connector type defined in objectid.h
+  ATOM_DP_VS_MODE asMode; // DP Voltage swing mode
 	};
-	ATOM_DIG_TRANSMITTER_CONFIG_V2 acConfig;
-	UCHAR ucAction;		/*  define as ATOM_TRANSMITER_ACTION_XXX */
-	UCHAR ucReserved[4];
-} DIG_TRANSMITTER_CONTROL_PARAMETERS_V2;
+  ATOM_DIG_TRANSMITTER_CONFIG_V2 acConfig;
+	UCHAR ucAction;				  // define as ATOM_TRANSMITER_ACTION_XXX
+  UCHAR ucReserved[4];
+}DIG_TRANSMITTER_CONTROL_PARAMETERS_V2;
 
-/****************************************************************************/
-/*  Structures used by DAC1OuputControlTable */
-/*                     DAC2OuputControlTable */
-/*                     LVTMAOutputControlTable  (Before DEC30) */
-/*                     TMDSAOutputControlTable  (Before DEC30) */
-/****************************************************************************/
-typedef struct _DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS {
-	UCHAR ucAction;		/*  Possible input:ATOM_ENABLE||ATOMDISABLE */
-	/*  When the display is LCD, in addition to above: */
-	/*  ATOM_LCD_BLOFF|| ATOM_LCD_BLON ||ATOM_LCD_BL_BRIGHTNESS_CONTROL||ATOM_LCD_SELFTEST_START|| */
-	/*  ATOM_LCD_SELFTEST_STOP */
+typedef struct _ATOM_DIG_TRANSMITTER_CONFIG_V3
+{
+#if ATOM_BIG_ENDIAN
+  UCHAR ucTransmitterSel:2;         //bit7:6: =0 Dig Transmitter 1 ( Uniphy AB )
+                                    //        =1 Dig Transmitter 2 ( Uniphy CD )
+                                    //        =2 Dig Transmitter 3 ( Uniphy EF )
+  UCHAR ucRefClkSource:2;           //bit5:4: PPLL1 =0, PPLL2=1, EXT_CLK=2
+  UCHAR ucEncoderSel:1;             //bit3=0: Data/Clk path source from DIGA/C/E. =1: Data/clk path source from DIGB/D/F
+  UCHAR ucLinkSel:1;                //bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E
+                                    //    =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F
+  UCHAR fCoherentMode:1;            //bit1=1: Coherent Mode ( for DVI/HDMI mode )
+  UCHAR fDualLinkConnector:1;       //bit0=1: Dual Link DVI connector
+#else
+  UCHAR fDualLinkConnector:1;       //bit0=1: Dual Link DVI connector
+  UCHAR fCoherentMode:1;            //bit1=1: Coherent Mode ( for DVI/HDMI mode )
+  UCHAR ucLinkSel:1;                //bit2=0: Uniphy LINKA or C or E when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is A or C or E
+                                    //    =1: Uniphy LINKB or D or F when fDualLinkConnector=0. when fDualLinkConnector=1, it means master link of dual link is B or D or F
+  UCHAR ucEncoderSel:1;             //bit3=0: Data/Clk path source from DIGA/C/E. =1: Data/clk path source from DIGB/D/F
+  UCHAR ucRefClkSource:2;           //bit5:4: PPLL1 =0, PPLL2=1, EXT_CLK=2
+  UCHAR ucTransmitterSel:2;         //bit7:6: =0 Dig Transmitter 1 ( Uniphy AB )
+                                    //        =1 Dig Transmitter 2 ( Uniphy CD )
+                                    //        =2 Dig Transmitter 3 ( Uniphy EF )
+#endif
+}ATOM_DIG_TRANSMITTER_CONFIG_V3;
 
-	UCHAR aucPadding[3];	/*  padding to DWORD aligned */
-} DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS;
+typedef struct _DIG_TRANSMITTER_CONTROL_PARAMETERS_V3
+{
+	union
+	{
+    USHORT usPixelClock;		// in 10KHz; for bios convenient
+	  USHORT usInitInfo;			// when init uniphy,lower 8bit is used for connector type defined in objectid.h
+    ATOM_DP_VS_MODE asMode; // DP Voltage swing mode
+	};
+  ATOM_DIG_TRANSMITTER_CONFIG_V3 acConfig;
+	UCHAR ucAction;				    // define as ATOM_TRANSMITER_ACTION_XXX
+  UCHAR ucLaneNum;
+  UCHAR ucReserved[3];
+}DIG_TRANSMITTER_CONTROL_PARAMETERS_V3;
+
+//ucConfig 
+//Bit0
+#define ATOM_TRANSMITTER_CONFIG_V3_DUAL_LINK_CONNECTOR			0x01
+
+//Bit1
+#define ATOM_TRANSMITTER_CONFIG_V3_COHERENT				          0x02
+
+//Bit2
+#define ATOM_TRANSMITTER_CONFIG_V3_LINK_SEL_MASK		        0x04
+#define ATOM_TRANSMITTER_CONFIG_V3_LINKA  			            0x00
+#define ATOM_TRANSMITTER_CONFIG_V3_LINKB				            0x04
+
+// Bit3
+#define ATOM_TRANSMITTER_CONFIG_V3_ENCODER_SEL_MASK	        0x08
+#define ATOM_TRANSMITTER_CONFIG_V3_DIG1_ENCODER		          0x00
+#define ATOM_TRANSMITTER_CONFIG_V3_DIG2_ENCODER		          0x08
+
+// Bit5:4
+#define ATOM_TRASMITTER_CONFIG_V3_REFCLK_SEL_MASK 	        0x30
+#define ATOM_TRASMITTER_CONFIG_V3_P1PLL          		        0x00
+#define ATOM_TRASMITTER_CONFIG_V3_P2PLL		                  0x10
+#define ATOM_TRASMITTER_CONFIG_V3_REFCLK_SRC_EXT            0x20
+
+// Bit7:6
+#define ATOM_TRANSMITTER_CONFIG_V3_TRANSMITTER_SEL_MASK     0xC0
+#define ATOM_TRANSMITTER_CONFIG_V3_TRANSMITTER1           	0x00	//AB
+#define ATOM_TRANSMITTER_CONFIG_V3_TRANSMITTER2           	0x40	//CD
+#define ATOM_TRANSMITTER_CONFIG_V3_TRANSMITTER3           	0x80	//EF
+
+/****************************************************************************/	
+// Structures used by DAC1OuputControlTable
+//                    DAC2OuputControlTable
+//                    LVTMAOutputControlTable  (Before DEC30)
+//                    TMDSAOutputControlTable  (Before DEC30)
+/****************************************************************************/	
+typedef struct _DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+{
+  UCHAR  ucAction;                    // Possible input:ATOM_ENABLE||ATOMDISABLE
+                                      // When the display is LCD, in addition to above:
+                                      // ATOM_LCD_BLOFF|| ATOM_LCD_BLON ||ATOM_LCD_BL_BRIGHTNESS_CONTROL||ATOM_LCD_SELFTEST_START||
+                                      // ATOM_LCD_SELFTEST_STOP
+                                      
+  UCHAR  aucPadding[3];               // padding to DWORD aligned
+}DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS;
 
 #define DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
 
-#define CRT1_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+
+#define CRT1_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS 
 #define CRT1_OUTPUT_CONTROL_PS_ALLOCATION  DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
 
-#define CRT2_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
+#define CRT2_OUTPUT_CONTROL_PARAMETERS     DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS 
 #define CRT2_OUTPUT_CONTROL_PS_ALLOCATION  DISPLAY_DEVICE_OUTPUT_CONTROL_PS_ALLOCATION
 
 #define CV1_OUTPUT_CONTROL_PARAMETERS      DISPLAY_DEVICE_OUTPUT_CONTROL_PARAMETERS
@@ -782,397 +980,550 @@
 #define DVO_OUTPUT_CONTROL_PS_ALLOCATION   DIG_TRANSMITTER_CONTROL_PS_ALLOCATION
 #define DVO_OUTPUT_CONTROL_PARAMETERS_V3	 DIG_TRANSMITTER_CONTROL_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by BlankCRTCTable */
-/****************************************************************************/
-typedef struct _BLANK_CRTC_PARAMETERS {
-	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucBlanking;	/*  ATOM_BLANKING or ATOM_BLANKINGOFF */
-	USHORT usBlackColorRCr;
-	USHORT usBlackColorGY;
-	USHORT usBlackColorBCb;
-} BLANK_CRTC_PARAMETERS;
+/****************************************************************************/	
+// Structures used by BlankCRTCTable
+/****************************************************************************/	
+typedef struct _BLANK_CRTC_PARAMETERS
+{
+  UCHAR  ucCRTC;                    	// ATOM_CRTC1 or ATOM_CRTC2
+  UCHAR  ucBlanking;                  // ATOM_BLANKING or ATOM_BLANKINGOFF
+  USHORT usBlackColorRCr;
+  USHORT usBlackColorGY;
+  USHORT usBlackColorBCb;
+}BLANK_CRTC_PARAMETERS;
 #define BLANK_CRTC_PS_ALLOCATION    BLANK_CRTC_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by EnableCRTCTable */
-/*                     EnableCRTCMemReqTable */
-/*                     UpdateCRTC_DoubleBufferRegistersTable */
-/****************************************************************************/
-typedef struct _ENABLE_CRTC_PARAMETERS {
-	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
-	UCHAR ucPadding[2];
-} ENABLE_CRTC_PARAMETERS;
+/****************************************************************************/	
+// Structures used by EnableCRTCTable
+//                    EnableCRTCMemReqTable
+//                    UpdateCRTC_DoubleBufferRegistersTable
+/****************************************************************************/	
+typedef struct _ENABLE_CRTC_PARAMETERS
+{
+  UCHAR ucCRTC;                    	  // ATOM_CRTC1 or ATOM_CRTC2
+  UCHAR ucEnable;                     // ATOM_ENABLE or ATOM_DISABLE 
+  UCHAR ucPadding[2];
+}ENABLE_CRTC_PARAMETERS;
 #define ENABLE_CRTC_PS_ALLOCATION   ENABLE_CRTC_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by SetCRTC_OverScanTable */
-/****************************************************************************/
-typedef struct _SET_CRTC_OVERSCAN_PARAMETERS {
-	USHORT usOverscanRight;	/*  right */
-	USHORT usOverscanLeft;	/*  left */
-	USHORT usOverscanBottom;	/*  bottom */
-	USHORT usOverscanTop;	/*  top */
-	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucPadding[3];
-} SET_CRTC_OVERSCAN_PARAMETERS;
+/****************************************************************************/	
+// Structures used by SetCRTC_OverScanTable
+/****************************************************************************/	
+typedef struct _SET_CRTC_OVERSCAN_PARAMETERS
+{
+  USHORT usOverscanRight;             // right
+  USHORT usOverscanLeft;              // left
+  USHORT usOverscanBottom;            // bottom
+  USHORT usOverscanTop;               // top
+  UCHAR  ucCRTC;                      // ATOM_CRTC1 or ATOM_CRTC2
+  UCHAR  ucPadding[3];
+}SET_CRTC_OVERSCAN_PARAMETERS;
 #define SET_CRTC_OVERSCAN_PS_ALLOCATION  SET_CRTC_OVERSCAN_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by SetCRTC_ReplicationTable */
-/****************************************************************************/
-typedef struct _SET_CRTC_REPLICATION_PARAMETERS {
-	UCHAR ucH_Replication;	/*  horizontal replication */
-	UCHAR ucV_Replication;	/*  vertical replication */
-	UCHAR usCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucPadding;
-} SET_CRTC_REPLICATION_PARAMETERS;
+/****************************************************************************/	
+// Structures used by SetCRTC_ReplicationTable
+/****************************************************************************/	
+typedef struct _SET_CRTC_REPLICATION_PARAMETERS
+{
+  UCHAR ucH_Replication;              // horizontal replication
+  UCHAR ucV_Replication;              // vertical replication
+  UCHAR usCRTC;                       // ATOM_CRTC1 or ATOM_CRTC2
+  UCHAR ucPadding;
+}SET_CRTC_REPLICATION_PARAMETERS;
 #define SET_CRTC_REPLICATION_PS_ALLOCATION  SET_CRTC_REPLICATION_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by SelectCRTC_SourceTable */
-/****************************************************************************/
-typedef struct _SELECT_CRTC_SOURCE_PARAMETERS {
-	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucDevice;		/*  ATOM_DEVICE_CRT1|ATOM_DEVICE_CRT2|.... */
-	UCHAR ucPadding[2];
-} SELECT_CRTC_SOURCE_PARAMETERS;
+/****************************************************************************/	
+// Structures used by SelectCRTC_SourceTable
+/****************************************************************************/	
+typedef struct _SELECT_CRTC_SOURCE_PARAMETERS
+{
+  UCHAR ucCRTC;                    	  // ATOM_CRTC1 or ATOM_CRTC2
+  UCHAR ucDevice;                     // ATOM_DEVICE_CRT1|ATOM_DEVICE_CRT2|....
+  UCHAR ucPadding[2];
+}SELECT_CRTC_SOURCE_PARAMETERS;
 #define SELECT_CRTC_SOURCE_PS_ALLOCATION  SELECT_CRTC_SOURCE_PARAMETERS
 
-typedef struct _SELECT_CRTC_SOURCE_PARAMETERS_V2 {
-	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucEncoderID;	/*  DAC1/DAC2/TVOUT/DIG1/DIG2/DVO */
-	UCHAR ucEncodeMode;	/*  Encoding mode, only valid when using DIG1/DIG2/DVO */
-	UCHAR ucPadding;
-} SELECT_CRTC_SOURCE_PARAMETERS_V2;
+typedef struct _SELECT_CRTC_SOURCE_PARAMETERS_V2
+{
+  UCHAR ucCRTC;                    	  // ATOM_CRTC1 or ATOM_CRTC2
+  UCHAR ucEncoderID;                  // DAC1/DAC2/TVOUT/DIG1/DIG2/DVO
+  UCHAR ucEncodeMode;									// Encoding mode, only valid when using DIG1/DIG2/DVO
+  UCHAR ucPadding;
+}SELECT_CRTC_SOURCE_PARAMETERS_V2;
 
-/* ucEncoderID */
-/* #define ASIC_INT_DAC1_ENCODER_ID                                              0x00 */
-/* #define ASIC_INT_TV_ENCODER_ID                                                                        0x02 */
-/* #define ASIC_INT_DIG1_ENCODER_ID                                                              0x03 */
-/* #define ASIC_INT_DAC2_ENCODER_ID                                                              0x04 */
-/* #define ASIC_EXT_TV_ENCODER_ID                                                                        0x06 */
-/* #define ASIC_INT_DVO_ENCODER_ID                                                                       0x07 */
-/* #define ASIC_INT_DIG2_ENCODER_ID                                                              0x09 */
-/* #define ASIC_EXT_DIG_ENCODER_ID                                                                       0x05 */
+//ucEncoderID
+//#define ASIC_INT_DAC1_ENCODER_ID    						0x00 
+//#define ASIC_INT_TV_ENCODER_ID									0x02
+//#define ASIC_INT_DIG1_ENCODER_ID								0x03
+//#define ASIC_INT_DAC2_ENCODER_ID								0x04
+//#define ASIC_EXT_TV_ENCODER_ID									0x06
+//#define ASIC_INT_DVO_ENCODER_ID									0x07
+//#define ASIC_INT_DIG2_ENCODER_ID								0x09
+//#define ASIC_EXT_DIG_ENCODER_ID									0x05
 
-/* ucEncodeMode */
-/* #define ATOM_ENCODER_MODE_DP                                                                          0 */
-/* #define ATOM_ENCODER_MODE_LVDS                                                                        1 */
-/* #define ATOM_ENCODER_MODE_DVI                                                                         2 */
-/* #define ATOM_ENCODER_MODE_HDMI                                                                        3 */
-/* #define ATOM_ENCODER_MODE_SDVO                                                                        4 */
-/* #define ATOM_ENCODER_MODE_TV                                                                          13 */
-/* #define ATOM_ENCODER_MODE_CV                                                                          14 */
-/* #define ATOM_ENCODER_MODE_CRT                                                                         15 */
+//ucEncodeMode
+//#define ATOM_ENCODER_MODE_DP										0
+//#define ATOM_ENCODER_MODE_LVDS									1
+//#define ATOM_ENCODER_MODE_DVI										2
+//#define ATOM_ENCODER_MODE_HDMI									3
+//#define ATOM_ENCODER_MODE_SDVO									4
+//#define ATOM_ENCODER_MODE_TV										13
+//#define ATOM_ENCODER_MODE_CV										14
+//#define ATOM_ENCODER_MODE_CRT										15
 
-/****************************************************************************/
-/*  Structures used by SetPixelClockTable */
-/*                     GetPixelClockTable */
-/****************************************************************************/
-/* Major revision=1., Minor revision=1 */
-typedef struct _PIXEL_CLOCK_PARAMETERS {
-	USHORT usPixelClock;	/*  in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div) */
-	/*  0 means disable PPLL */
-	USHORT usRefDiv;	/*  Reference divider */
-	USHORT usFbDiv;		/*  feedback divider */
-	UCHAR ucPostDiv;	/*  post divider */
-	UCHAR ucFracFbDiv;	/*  fractional feedback divider */
-	UCHAR ucPpll;		/*  ATOM_PPLL1 or ATOM_PPL2 */
-	UCHAR ucRefDivSrc;	/*  ATOM_PJITTER or ATO_NONPJITTER */
-	UCHAR ucCRTC;		/*  Which CRTC uses this Ppll */
-	UCHAR ucPadding;
-} PIXEL_CLOCK_PARAMETERS;
+/****************************************************************************/	
+// Structures used by SetPixelClockTable
+//                    GetPixelClockTable 
+/****************************************************************************/	
+//Major revision=1., Minor revision=1
+typedef struct _PIXEL_CLOCK_PARAMETERS
+{
+  USHORT usPixelClock;                // in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div)
+                                      // 0 means disable PPLL
+  USHORT usRefDiv;                    // Reference divider
+  USHORT usFbDiv;                     // feedback divider
+  UCHAR  ucPostDiv;                   // post divider	
+  UCHAR  ucFracFbDiv;                 // fractional feedback divider
+  UCHAR  ucPpll;                      // ATOM_PPLL1 or ATOM_PPL2
+  UCHAR  ucRefDivSrc;                 // ATOM_PJITTER or ATO_NONPJITTER
+  UCHAR  ucCRTC;                      // Which CRTC uses this Ppll
+  UCHAR  ucPadding;
+}PIXEL_CLOCK_PARAMETERS;
 
-/* Major revision=1., Minor revision=2, add ucMiscIfno */
-/* ucMiscInfo: */
+//Major revision=1., Minor revision=2, add ucMiscIfno
+//ucMiscInfo:
 #define MISC_FORCE_REPROG_PIXEL_CLOCK 0x1
 #define MISC_DEVICE_INDEX_MASK        0xF0
 #define MISC_DEVICE_INDEX_SHIFT       4
 
-typedef struct _PIXEL_CLOCK_PARAMETERS_V2 {
-	USHORT usPixelClock;	/*  in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div) */
-	/*  0 means disable PPLL */
-	USHORT usRefDiv;	/*  Reference divider */
-	USHORT usFbDiv;		/*  feedback divider */
-	UCHAR ucPostDiv;	/*  post divider */
-	UCHAR ucFracFbDiv;	/*  fractional feedback divider */
-	UCHAR ucPpll;		/*  ATOM_PPLL1 or ATOM_PPL2 */
-	UCHAR ucRefDivSrc;	/*  ATOM_PJITTER or ATO_NONPJITTER */
-	UCHAR ucCRTC;		/*  Which CRTC uses this Ppll */
-	UCHAR ucMiscInfo;	/*  Different bits for different purpose, bit [7:4] as device index, bit[0]=Force prog */
-} PIXEL_CLOCK_PARAMETERS_V2;
+typedef struct _PIXEL_CLOCK_PARAMETERS_V2
+{
+  USHORT usPixelClock;                // in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div)
+                                      // 0 means disable PPLL
+  USHORT usRefDiv;                    // Reference divider
+  USHORT usFbDiv;                     // feedback divider
+  UCHAR  ucPostDiv;                   // post divider	
+  UCHAR  ucFracFbDiv;                 // fractional feedback divider
+  UCHAR  ucPpll;                      // ATOM_PPLL1 or ATOM_PPL2
+  UCHAR  ucRefDivSrc;                 // ATOM_PJITTER or ATO_NONPJITTER
+  UCHAR  ucCRTC;                      // Which CRTC uses this Ppll
+  UCHAR  ucMiscInfo;                  // Different bits for different purpose, bit [7:4] as device index, bit[0]=Force prog
+}PIXEL_CLOCK_PARAMETERS_V2;
 
-/* Major revision=1., Minor revision=3, structure/definition change */
-/* ucEncoderMode: */
-/* ATOM_ENCODER_MODE_DP */
-/* ATOM_ENOCDER_MODE_LVDS */
-/* ATOM_ENOCDER_MODE_DVI */
-/* ATOM_ENOCDER_MODE_HDMI */
-/* ATOM_ENOCDER_MODE_SDVO */
-/* ATOM_ENCODER_MODE_TV                                                                          13 */
-/* ATOM_ENCODER_MODE_CV                                                                          14 */
-/* ATOM_ENCODER_MODE_CRT                                                                         15 */
+//Major revision=1., Minor revision=3, structure/definition change
+//ucEncoderMode:
+//ATOM_ENCODER_MODE_DP
+//ATOM_ENOCDER_MODE_LVDS
+//ATOM_ENOCDER_MODE_DVI
+//ATOM_ENOCDER_MODE_HDMI
+//ATOM_ENOCDER_MODE_SDVO
+//ATOM_ENCODER_MODE_TV										13
+//ATOM_ENCODER_MODE_CV										14
+//ATOM_ENCODER_MODE_CRT										15
 
-/* ucDVOConfig */
-/* #define DVO_ENCODER_CONFIG_RATE_SEL                                                   0x01 */
-/* #define DVO_ENCODER_CONFIG_DDR_SPEED                                          0x00 */
-/* #define DVO_ENCODER_CONFIG_SDR_SPEED                                          0x01 */
-/* #define DVO_ENCODER_CONFIG_OUTPUT_SEL                                         0x0c */
-/* #define DVO_ENCODER_CONFIG_LOW12BIT                                                   0x00 */
-/* #define DVO_ENCODER_CONFIG_UPPER12BIT                                         0x04 */
-/* #define DVO_ENCODER_CONFIG_24BIT                                                              0x08 */
+//ucDVOConfig
+//#define DVO_ENCODER_CONFIG_RATE_SEL							0x01
+//#define DVO_ENCODER_CONFIG_DDR_SPEED						0x00
+//#define DVO_ENCODER_CONFIG_SDR_SPEED						0x01
+//#define DVO_ENCODER_CONFIG_OUTPUT_SEL						0x0c
+//#define DVO_ENCODER_CONFIG_LOW12BIT							0x00
+//#define DVO_ENCODER_CONFIG_UPPER12BIT						0x04
+//#define DVO_ENCODER_CONFIG_24BIT								0x08
 
-/* ucMiscInfo: also changed, see below */
+//ucMiscInfo: also changed, see below
 #define PIXEL_CLOCK_MISC_FORCE_PROG_PPLL						0x01
 #define PIXEL_CLOCK_MISC_VGA_MODE										0x02
 #define PIXEL_CLOCK_MISC_CRTC_SEL_MASK							0x04
 #define PIXEL_CLOCK_MISC_CRTC_SEL_CRTC1							0x00
 #define PIXEL_CLOCK_MISC_CRTC_SEL_CRTC2							0x04
 #define PIXEL_CLOCK_MISC_USE_ENGINE_FOR_DISPCLK			0x08
+#define PIXEL_CLOCK_MISC_REF_DIV_SRC                    0x10
+// V1.4 for RoadRunner
+#define PIXEL_CLOCK_V4_MISC_SS_ENABLE               0x10
+#define PIXEL_CLOCK_V4_MISC_COHERENT_MODE           0x20
 
-typedef struct _PIXEL_CLOCK_PARAMETERS_V3 {
-	USHORT usPixelClock;	/*  in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div) */
-	/*  0 means disable PPLL. For VGA PPLL,make sure this value is not 0. */
-	USHORT usRefDiv;	/*  Reference divider */
-	USHORT usFbDiv;		/*  feedback divider */
-	UCHAR ucPostDiv;	/*  post divider */
-	UCHAR ucFracFbDiv;	/*  fractional feedback divider */
-	UCHAR ucPpll;		/*  ATOM_PPLL1 or ATOM_PPL2 */
-	UCHAR ucTransmitterId;	/*  graphic encoder id defined in objectId.h */
-	union {
-		UCHAR ucEncoderMode;	/*  encoder type defined as ATOM_ENCODER_MODE_DP/DVI/HDMI/ */
-		UCHAR ucDVOConfig;	/*  when use DVO, need to know SDR/DDR, 12bit or 24bit */
+typedef struct _PIXEL_CLOCK_PARAMETERS_V3
+{
+  USHORT usPixelClock;                // in 10kHz unit; for bios convenient = (RefClk*FB_Div)/(Ref_Div*Post_Div)
+                                      // 0 means disable PPLL. For VGA PPLL,make sure this value is not 0.
+  USHORT usRefDiv;                    // Reference divider
+  USHORT usFbDiv;                     // feedback divider
+  UCHAR  ucPostDiv;                   // post divider	
+  UCHAR  ucFracFbDiv;                 // fractional feedback divider
+  UCHAR  ucPpll;                      // ATOM_PPLL1 or ATOM_PPL2
+  UCHAR  ucTransmitterId;             // graphic encoder id defined in objectId.h
+	union
+	{
+  UCHAR  ucEncoderMode;               // encoder type defined as ATOM_ENCODER_MODE_DP/DVI/HDMI/
+	UCHAR  ucDVOConfig;									// when use DVO, need to know SDR/DDR, 12bit or 24bit
 	};
-	UCHAR ucMiscInfo;	/*  bit[0]=Force program, bit[1]= set pclk for VGA, b[2]= CRTC sel */
-	/*  bit[3]=0:use PPLL for dispclk source, =1: use engine clock for dispclock source */
-} PIXEL_CLOCK_PARAMETERS_V3;
+  UCHAR  ucMiscInfo;                  // bit[0]=Force program, bit[1]= set pclk for VGA, b[2]= CRTC sel
+                                      // bit[3]=0:use PPLL for dispclk source, =1: use engine clock for dispclock source
+                                      // bit[4]=0:use XTALIN as the source of reference divider,=1 use the pre-defined clock as the source of reference divider
+}PIXEL_CLOCK_PARAMETERS_V3;
 
 #define PIXEL_CLOCK_PARAMETERS_LAST			PIXEL_CLOCK_PARAMETERS_V2
 #define GET_PIXEL_CLOCK_PS_ALLOCATION		PIXEL_CLOCK_PARAMETERS_LAST
 
-/****************************************************************************/
-/*  Structures used by AdjustDisplayPllTable */
-/****************************************************************************/
-typedef struct _ADJUST_DISPLAY_PLL_PARAMETERS {
+typedef struct _PIXEL_CLOCK_PARAMETERS_V5
+{
+  UCHAR  ucCRTC;             // ATOM_CRTC1~6, indicate the CRTC controller to 
+                             // drive the pixel clock. not used for DCPLL case.
+  union{
+  UCHAR  ucReserved;
+  UCHAR  ucFracFbDiv;        // [gphan] temporary to prevent build problem.  remove it after driver code is changed.
+  };
+  USHORT usPixelClock;       // target the pixel clock to drive the CRTC timing
+                             // 0 means disable PPLL/DCPLL. 
+  USHORT usFbDiv;            // feedback divider integer part. 
+  UCHAR  ucPostDiv;          // post divider. 
+  UCHAR  ucRefDiv;           // Reference divider
+  UCHAR  ucPpll;             // ATOM_PPLL1/ATOM_PPLL2/ATOM_DCPLL
+  UCHAR  ucTransmitterID;    // ASIC encoder id defined in objectId.h, 
+                             // indicate which graphic encoder will be used. 
+  UCHAR  ucEncoderMode;      // Encoder mode: 
+  UCHAR  ucMiscInfo;         // bit[0]= Force program PPLL 
+                             // bit[1]= when VGA timing is used. 
+                             // bit[3:2]= HDMI panel bit depth: =0: 24bpp =1:30bpp, =2:32bpp
+                             // bit[4]= RefClock source for PPLL. 
+                             // =0: XTLAIN( default mode )
+	                           // =1: other external clock source, which is pre-defined
+                             //     by VBIOS depend on the feature required.
+                             // bit[7:5]: reserved.
+  ULONG  ulFbDivDecFrac;     // 20 bit feedback divider decimal fraction part, range from 1~999999 ( 0.000001 to 0.999999 )
+
+}PIXEL_CLOCK_PARAMETERS_V5;
+
+#define PIXEL_CLOCK_V5_MISC_FORCE_PROG_PPLL					0x01
+#define PIXEL_CLOCK_V5_MISC_VGA_MODE								0x02
+#define PIXEL_CLOCK_V5_MISC_HDMI_BPP_MASK           0x0c
+#define PIXEL_CLOCK_V5_MISC_HDMI_24BPP              0x00
+#define PIXEL_CLOCK_V5_MISC_HDMI_30BPP              0x04
+#define PIXEL_CLOCK_V5_MISC_HDMI_32BPP              0x08
+#define PIXEL_CLOCK_V5_MISC_REF_DIV_SRC             0x10
+
+typedef struct _GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V2
+{
+  PIXEL_CLOCK_PARAMETERS_V3 sDispClkInput;
+}GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V2;
+
+typedef struct _GET_DISP_PLL_STATUS_OUTPUT_PARAMETERS_V2
+{
+  UCHAR  ucStatus;
+  UCHAR  ucRefDivSrc;                 // =1: reference clock source from XTALIN, =0: source from PCIE ref clock
+  UCHAR  ucReserved[2];
+}GET_DISP_PLL_STATUS_OUTPUT_PARAMETERS_V2;
+
+typedef struct _GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V3
+{
+  PIXEL_CLOCK_PARAMETERS_V5 sDispClkInput;
+}GET_DISP_PLL_STATUS_INPUT_PARAMETERS_V3;
+
+/****************************************************************************/	
+// Structures used by AdjustDisplayPllTable
+/****************************************************************************/	
+typedef struct _ADJUST_DISPLAY_PLL_PARAMETERS
+{
 	USHORT usPixelClock;
 	UCHAR ucTransmitterID;
 	UCHAR ucEncodeMode;
-	union {
-		UCHAR ucDVOConfig;	/* if DVO, need passing link rate and output 12bitlow or 24bit */
-		UCHAR ucConfig;	/* if none DVO, not defined yet */
+	union
+	{
+		UCHAR ucDVOConfig;									//if DVO, need passing link rate and output 12bitlow or 24bit
+		UCHAR ucConfig;											//if none DVO, not defined yet
 	};
 	UCHAR ucReserved[3];
-} ADJUST_DISPLAY_PLL_PARAMETERS;
+}ADJUST_DISPLAY_PLL_PARAMETERS;
 
 #define ADJUST_DISPLAY_CONFIG_SS_ENABLE       0x10
-
 #define ADJUST_DISPLAY_PLL_PS_ALLOCATION			ADJUST_DISPLAY_PLL_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by EnableYUVTable */
-/****************************************************************************/
-typedef struct _ENABLE_YUV_PARAMETERS {
-	UCHAR ucEnable;		/*  ATOM_ENABLE:Enable YUV or ATOM_DISABLE:Disable YUV (RGB) */
-	UCHAR ucCRTC;		/*  Which CRTC needs this YUV or RGB format */
-	UCHAR ucPadding[2];
-} ENABLE_YUV_PARAMETERS;
+typedef struct _ADJUST_DISPLAY_PLL_INPUT_PARAMETERS_V3
+{
+	USHORT usPixelClock;                    // target pixel clock
+	UCHAR ucTransmitterID;                  // transmitter id defined in objectid.h
+	UCHAR ucEncodeMode;                     // encoder mode: CRT, LVDS, DP, TMDS or HDMI
+  UCHAR ucDispPllConfig;                 // display pll configure parameter defined as following DISPPLL_CONFIG_XXXX
+	UCHAR ucReserved[3];
+}ADJUST_DISPLAY_PLL_INPUT_PARAMETERS_V3;
+
+// usDispPllConfig v1.2 for RoadRunner
+#define DISPPLL_CONFIG_DVO_RATE_SEL                0x0001     // need only when ucTransmitterID = DVO
+#define DISPPLL_CONFIG_DVO_DDR_SPEED               0x0000     // need only when ucTransmitterID = DVO
+#define DISPPLL_CONFIG_DVO_SDR_SPEED               0x0001     // need only when ucTransmitterID = DVO
+#define DISPPLL_CONFIG_DVO_OUTPUT_SEL              0x000c     // need only when ucTransmitterID = DVO
+#define DISPPLL_CONFIG_DVO_LOW12BIT                0x0000     // need only when ucTransmitterID = DVO
+#define DISPPLL_CONFIG_DVO_UPPER12BIT              0x0004     // need only when ucTransmitterID = DVO
+#define DISPPLL_CONFIG_DVO_24BIT                   0x0008     // need only when ucTransmitterID = DVO
+#define DISPPLL_CONFIG_SS_ENABLE                   0x0010     // Only used when ucEncoderMode = DP or LVDS
+#define DISPPLL_CONFIG_COHERENT_MODE               0x0020     // Only used when ucEncoderMode = TMDS or HDMI
+#define DISPPLL_CONFIG_DUAL_LINK                   0x0040     // Only used when ucEncoderMode = TMDS or LVDS
+
+
+typedef struct _ADJUST_DISPLAY_PLL_OUTPUT_PARAMETERS_V3
+{
+  ULONG ulDispPllFreq;                 // return display PPLL freq which is used to generate the pixclock, and related idclk, symclk etc
+  UCHAR ucRefDiv;                      // if it is none-zero, it is used to be calculated the other ppll parameter fb_divider and post_div ( if it is not given )
+  UCHAR ucPostDiv;                     // if it is none-zero, it is used to be calculated the other ppll parameter fb_divider
+  UCHAR ucReserved[2];  
+}ADJUST_DISPLAY_PLL_OUTPUT_PARAMETERS_V3;
+
+typedef struct _ADJUST_DISPLAY_PLL_PS_ALLOCATION_V3
+{
+  union 
+  {
+    ADJUST_DISPLAY_PLL_INPUT_PARAMETERS_V3  sInput;
+    ADJUST_DISPLAY_PLL_OUTPUT_PARAMETERS_V3 sOutput;
+  };
+} ADJUST_DISPLAY_PLL_PS_ALLOCATION_V3;
+
+/****************************************************************************/	
+// Structures used by EnableYUVTable
+/****************************************************************************/	
+typedef struct _ENABLE_YUV_PARAMETERS
+{
+  UCHAR ucEnable;                     // ATOM_ENABLE:Enable YUV or ATOM_DISABLE:Disable YUV (RGB)
+  UCHAR ucCRTC;                       // Which CRTC needs this YUV or RGB format
+  UCHAR ucPadding[2];
+}ENABLE_YUV_PARAMETERS;
 #define ENABLE_YUV_PS_ALLOCATION ENABLE_YUV_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by GetMemoryClockTable */
-/****************************************************************************/
-typedef struct _GET_MEMORY_CLOCK_PARAMETERS {
-	ULONG ulReturnMemoryClock;	/*  current memory speed in 10KHz unit */
+/****************************************************************************/	
+// Structures used by GetMemoryClockTable
+/****************************************************************************/	
+typedef struct _GET_MEMORY_CLOCK_PARAMETERS
+{
+  ULONG ulReturnMemoryClock;          // current memory speed in 10KHz unit
 } GET_MEMORY_CLOCK_PARAMETERS;
 #define GET_MEMORY_CLOCK_PS_ALLOCATION  GET_MEMORY_CLOCK_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by GetEngineClockTable */
-/****************************************************************************/
-typedef struct _GET_ENGINE_CLOCK_PARAMETERS {
-	ULONG ulReturnEngineClock;	/*  current engine speed in 10KHz unit */
+/****************************************************************************/	
+// Structures used by GetEngineClockTable
+/****************************************************************************/	
+typedef struct _GET_ENGINE_CLOCK_PARAMETERS
+{
+  ULONG ulReturnEngineClock;          // current engine speed in 10KHz unit
 } GET_ENGINE_CLOCK_PARAMETERS;
 #define GET_ENGINE_CLOCK_PS_ALLOCATION  GET_ENGINE_CLOCK_PARAMETERS
 
-/****************************************************************************/
-/*  Following Structures and constant may be obsolete */
-/****************************************************************************/
-/* Maxium 8 bytes,the data read in will be placed in the parameter space. */
-/* Read operaion successeful when the paramter space is non-zero, otherwise read operation failed */
-typedef struct _READ_EDID_FROM_HW_I2C_DATA_PARAMETERS {
-	USHORT usPrescale;	/* Ratio between Engine clock and I2C clock */
-	USHORT usVRAMAddress;	/* Adress in Frame Buffer where to pace raw EDID */
-	USHORT usStatus;	/* When use output: lower byte EDID checksum, high byte hardware status */
-	/* WHen use input:  lower byte as 'byte to read':currently limited to 128byte or 1byte */
-	UCHAR ucSlaveAddr;	/* Read from which slave */
-	UCHAR ucLineNumber;	/* Read from which HW assisted line */
-} READ_EDID_FROM_HW_I2C_DATA_PARAMETERS;
+/****************************************************************************/	
+// Following Structures and constant may be obsolete
+/****************************************************************************/	
+//Maxium 8 bytes,the data read in will be placed in the parameter space.
+//Read operaion successeful when the paramter space is non-zero, otherwise read operation failed
+typedef struct _READ_EDID_FROM_HW_I2C_DATA_PARAMETERS
+{
+  USHORT    usPrescale;         //Ratio between Engine clock and I2C clock
+  USHORT    usVRAMAddress;      //Adress in Frame Buffer where to pace raw EDID
+  USHORT    usStatus;           //When use output: lower byte EDID checksum, high byte hardware status
+                                //WHen use input:  lower byte as 'byte to read':currently limited to 128byte or 1byte
+  UCHAR     ucSlaveAddr;        //Read from which slave
+  UCHAR     ucLineNumber;       //Read from which HW assisted line
+}READ_EDID_FROM_HW_I2C_DATA_PARAMETERS;
 #define READ_EDID_FROM_HW_I2C_DATA_PS_ALLOCATION  READ_EDID_FROM_HW_I2C_DATA_PARAMETERS
 
+
 #define  ATOM_WRITE_I2C_FORMAT_PSOFFSET_PSDATABYTE                  0
 #define  ATOM_WRITE_I2C_FORMAT_PSOFFSET_PSTWODATABYTES              1
 #define  ATOM_WRITE_I2C_FORMAT_PSCOUNTER_PSOFFSET_IDDATABLOCK       2
 #define  ATOM_WRITE_I2C_FORMAT_PSCOUNTER_IDOFFSET_PLUS_IDDATABLOCK  3
 #define  ATOM_WRITE_I2C_FORMAT_IDCOUNTER_IDOFFSET_IDDATABLOCK       4
 
-typedef struct _WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS {
-	USHORT usPrescale;	/* Ratio between Engine clock and I2C clock */
-	USHORT usByteOffset;	/* Write to which byte */
-	/* Upper portion of usByteOffset is Format of data */
-	/* 1bytePS+offsetPS */
-	/* 2bytesPS+offsetPS */
-	/* blockID+offsetPS */
-	/* blockID+offsetID */
-	/* blockID+counterID+offsetID */
-	UCHAR ucData;		/* PS data1 */
-	UCHAR ucStatus;		/* Status byte 1=success, 2=failure, Also is used as PS data2 */
-	UCHAR ucSlaveAddr;	/* Write to which slave */
-	UCHAR ucLineNumber;	/* Write from which HW assisted line */
-} WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS;
+typedef struct _WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS
+{
+  USHORT    usPrescale;         //Ratio between Engine clock and I2C clock
+  USHORT    usByteOffset;       //Write to which byte
+                                //Upper portion of usByteOffset is Format of data 
+                                //1bytePS+offsetPS
+                                //2bytesPS+offsetPS
+                                //blockID+offsetPS
+                                //blockID+offsetID
+                                //blockID+counterID+offsetID
+  UCHAR     ucData;             //PS data1
+  UCHAR     ucStatus;           //Status byte 1=success, 2=failure, Also is used as PS data2
+  UCHAR     ucSlaveAddr;        //Write to which slave
+  UCHAR     ucLineNumber;       //Write from which HW assisted line
+}WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS;
 
 #define WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION  WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS
 
-typedef struct _SET_UP_HW_I2C_DATA_PARAMETERS {
-	USHORT usPrescale;	/* Ratio between Engine clock and I2C clock */
-	UCHAR ucSlaveAddr;	/* Write to which slave */
-	UCHAR ucLineNumber;	/* Write from which HW assisted line */
-} SET_UP_HW_I2C_DATA_PARAMETERS;
+typedef struct _SET_UP_HW_I2C_DATA_PARAMETERS
+{
+  USHORT    usPrescale;         //Ratio between Engine clock and I2C clock
+  UCHAR     ucSlaveAddr;        //Write to which slave
+  UCHAR     ucLineNumber;       //Write from which HW assisted line
+}SET_UP_HW_I2C_DATA_PARAMETERS;
+
 
 /**************************************************************************/
 #define SPEED_FAN_CONTROL_PS_ALLOCATION   WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS
 
-/****************************************************************************/
-/*  Structures used by PowerConnectorDetectionTable */
-/****************************************************************************/
-typedef struct _POWER_CONNECTOR_DETECTION_PARAMETERS {
-	UCHAR ucPowerConnectorStatus;	/* Used for return value 0: detected, 1:not detected */
-	UCHAR ucPwrBehaviorId;
-	USHORT usPwrBudget;	/* how much power currently boot to in unit of watt */
-} POWER_CONNECTOR_DETECTION_PARAMETERS;
+/****************************************************************************/	
+// Structures used by PowerConnectorDetectionTable
+/****************************************************************************/	
+typedef struct	_POWER_CONNECTOR_DETECTION_PARAMETERS
+{
+  UCHAR   ucPowerConnectorStatus;      //Used for return value 0: detected, 1:not detected
+	UCHAR   ucPwrBehaviorId;							
+	USHORT	usPwrBudget;								 //how much power currently boot to in unit of watt
+}POWER_CONNECTOR_DETECTION_PARAMETERS;
 
-typedef struct POWER_CONNECTOR_DETECTION_PS_ALLOCATION {
-	UCHAR ucPowerConnectorStatus;	/* Used for return value 0: detected, 1:not detected */
-	UCHAR ucReserved;
-	USHORT usPwrBudget;	/* how much power currently boot to in unit of watt */
-	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
-} POWER_CONNECTOR_DETECTION_PS_ALLOCATION;
+typedef struct POWER_CONNECTOR_DETECTION_PS_ALLOCATION
+{                               
+  UCHAR   ucPowerConnectorStatus;      //Used for return value 0: detected, 1:not detected
+	UCHAR   ucReserved;
+	USHORT	usPwrBudget;								 //how much power currently boot to in unit of watt
+  WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION    sReserved;
+}POWER_CONNECTOR_DETECTION_PS_ALLOCATION;
 
 /****************************LVDS SS Command Table Definitions**********************/
 
-/****************************************************************************/
-/*  Structures used by EnableSpreadSpectrumOnPPLLTable */
-/****************************************************************************/
-typedef struct _ENABLE_LVDS_SS_PARAMETERS {
-	USHORT usSpreadSpectrumPercentage;
-	UCHAR ucSpreadSpectrumType;	/* Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
-	UCHAR ucSpreadSpectrumStepSize_Delay;	/* bits3:2 SS_STEP_SIZE; bit 6:4 SS_DELAY */
-	UCHAR ucEnable;		/* ATOM_ENABLE or ATOM_DISABLE */
-	UCHAR ucPadding[3];
-} ENABLE_LVDS_SS_PARAMETERS;
+/****************************************************************************/	
+// Structures used by EnableSpreadSpectrumOnPPLLTable
+/****************************************************************************/	
+typedef struct	_ENABLE_LVDS_SS_PARAMETERS
+{
+  USHORT  usSpreadSpectrumPercentage;       
+  UCHAR   ucSpreadSpectrumType;           //Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD
+  UCHAR   ucSpreadSpectrumStepSize_Delay; //bits3:2 SS_STEP_SIZE; bit 6:4 SS_DELAY
+  UCHAR   ucEnable;                       //ATOM_ENABLE or ATOM_DISABLE
+  UCHAR   ucPadding[3];
+}ENABLE_LVDS_SS_PARAMETERS;
 
-/* ucTableFormatRevision=1,ucTableContentRevision=2 */
-typedef struct _ENABLE_LVDS_SS_PARAMETERS_V2 {
-	USHORT usSpreadSpectrumPercentage;
-	UCHAR ucSpreadSpectrumType;	/* Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
-	UCHAR ucSpreadSpectrumStep;	/*  */
-	UCHAR ucEnable;		/* ATOM_ENABLE or ATOM_DISABLE */
-	UCHAR ucSpreadSpectrumDelay;
-	UCHAR ucSpreadSpectrumRange;
-	UCHAR ucPadding;
-} ENABLE_LVDS_SS_PARAMETERS_V2;
+//ucTableFormatRevision=1,ucTableContentRevision=2
+typedef struct	_ENABLE_LVDS_SS_PARAMETERS_V2
+{
+  USHORT  usSpreadSpectrumPercentage;       
+  UCHAR   ucSpreadSpectrumType;           //Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD
+  UCHAR   ucSpreadSpectrumStep;           //
+  UCHAR   ucEnable;                       //ATOM_ENABLE or ATOM_DISABLE
+  UCHAR   ucSpreadSpectrumDelay;
+  UCHAR   ucSpreadSpectrumRange;
+  UCHAR   ucPadding;
+}ENABLE_LVDS_SS_PARAMETERS_V2;
 
-/* This new structure is based on ENABLE_LVDS_SS_PARAMETERS but expands to SS on PPLL, so other devices can use SS. */
-typedef struct _ENABLE_SPREAD_SPECTRUM_ON_PPLL {
-	USHORT usSpreadSpectrumPercentage;
-	UCHAR ucSpreadSpectrumType;	/*  Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
-	UCHAR ucSpreadSpectrumStep;	/*  */
-	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
-	UCHAR ucSpreadSpectrumDelay;
-	UCHAR ucSpreadSpectrumRange;
-	UCHAR ucPpll;		/*  ATOM_PPLL1/ATOM_PPLL2 */
-} ENABLE_SPREAD_SPECTRUM_ON_PPLL;
+//This new structure is based on ENABLE_LVDS_SS_PARAMETERS but expands to SS on PPLL, so other devices can use SS.
+typedef struct	_ENABLE_SPREAD_SPECTRUM_ON_PPLL
+{
+  USHORT  usSpreadSpectrumPercentage;
+  UCHAR   ucSpreadSpectrumType;           // Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD
+  UCHAR   ucSpreadSpectrumStep;           //
+  UCHAR   ucEnable;                       // ATOM_ENABLE or ATOM_DISABLE
+  UCHAR   ucSpreadSpectrumDelay;
+  UCHAR   ucSpreadSpectrumRange;
+  UCHAR   ucPpll;												  // ATOM_PPLL1/ATOM_PPLL2
+}ENABLE_SPREAD_SPECTRUM_ON_PPLL;
+
+typedef struct _ENABLE_SPREAD_SPECTRUM_ON_PPLL_V2
+{
+  USHORT  usSpreadSpectrumPercentage;
+  UCHAR   ucSpreadSpectrumType;	        // Bit[0]: 0-Down Spread,1-Center Spread. 
+                                        // Bit[1]: 1-Ext. 0-Int. 
+                                        // Bit[3:2]: =0 P1PLL =1 P2PLL =2 DCPLL
+                                        // Bits[7:4] reserved
+  UCHAR   ucEnable;	                    // ATOM_ENABLE or ATOM_DISABLE
+  USHORT  usSpreadSpectrumAmount;      	// Includes SS_AMOUNT_FBDIV[7:0] and SS_AMOUNT_NFRAC_SLIP[11:8]    
+  USHORT  usSpreadSpectrumStep;	        // SS_STEP_SIZE_DSFRAC
+}ENABLE_SPREAD_SPECTRUM_ON_PPLL_V2;
+
+#define ATOM_PPLL_SS_TYPE_V2_DOWN_SPREAD      0x00
+#define ATOM_PPLL_SS_TYPE_V2_CENTRE_SPREAD    0x01
+#define ATOM_PPLL_SS_TYPE_V2_EXT_SPREAD       0x02
+#define ATOM_PPLL_SS_TYPE_V2_PPLL_SEL_MASK    0x0c
+#define ATOM_PPLL_SS_TYPE_V2_P1PLL            0x00
+#define ATOM_PPLL_SS_TYPE_V2_P2PLL            0x04
+#define ATOM_PPLL_SS_TYPE_V2_DCPLL            0x08
+#define ATOM_PPLL_SS_AMOUNT_V2_FBDIV_MASK     0x00FF
+#define ATOM_PPLL_SS_AMOUNT_V2_FBDIV_SHIFT    0
+#define ATOM_PPLL_SS_AMOUNT_V2_NFRAC_MASK     0x0F00
+#define ATOM_PPLL_SS_AMOUNT_V2_NFRAC_SHIFT    8
 
 #define ENABLE_SPREAD_SPECTRUM_ON_PPLL_PS_ALLOCATION  ENABLE_SPREAD_SPECTRUM_ON_PPLL
 
 /**************************************************************************/
 
-typedef struct _SET_PIXEL_CLOCK_PS_ALLOCATION {
-	PIXEL_CLOCK_PARAMETERS sPCLKInput;
-	ENABLE_SPREAD_SPECTRUM_ON_PPLL sReserved;	/* Caller doesn't need to init this portion */
-} SET_PIXEL_CLOCK_PS_ALLOCATION;
+typedef struct _SET_PIXEL_CLOCK_PS_ALLOCATION
+{
+  PIXEL_CLOCK_PARAMETERS sPCLKInput;
+  ENABLE_SPREAD_SPECTRUM_ON_PPLL sReserved;//Caller doesn't need to init this portion 
+}SET_PIXEL_CLOCK_PS_ALLOCATION;
 
 #define ENABLE_VGA_RENDER_PS_ALLOCATION   SET_PIXEL_CLOCK_PS_ALLOCATION
 
-/****************************************************************************/
-/*  Structures used by ### */
-/****************************************************************************/
-typedef struct _MEMORY_TRAINING_PARAMETERS {
-	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
-} MEMORY_TRAINING_PARAMETERS;
+/****************************************************************************/	
+// Structures used by ###
+/****************************************************************************/	
+typedef struct	_MEMORY_TRAINING_PARAMETERS
+{
+  ULONG ulTargetMemoryClock;          //In 10Khz unit
+}MEMORY_TRAINING_PARAMETERS;
 #define MEMORY_TRAINING_PS_ALLOCATION MEMORY_TRAINING_PARAMETERS
 
+
 /****************************LVDS and other encoder command table definitions **********************/
 
-/****************************************************************************/
-/*  Structures used by LVDSEncoderControlTable   (Before DCE30) */
-/*                     LVTMAEncoderControlTable  (Before DCE30) */
-/*                     TMDSAEncoderControlTable  (Before DCE30) */
-/****************************************************************************/
-typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS {
-	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-	UCHAR ucMisc;		/*  bit0=0: Enable single link */
-	/*      =1: Enable dual link */
-	/*  Bit1=0: 666RGB */
-	/*      =1: 888RGB */
-	UCHAR ucAction;		/*  0: turn off encoder */
-	/*  1: setup and turn on encoder */
-} LVDS_ENCODER_CONTROL_PARAMETERS;
+
+/****************************************************************************/	
+// Structures used by LVDSEncoderControlTable   (Before DCE30)
+//                    LVTMAEncoderControlTable  (Before DCE30)
+//                    TMDSAEncoderControlTable  (Before DCE30)
+/****************************************************************************/	
+typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS
+{
+  USHORT usPixelClock;  // in 10KHz; for bios convenient
+  UCHAR  ucMisc;        // bit0=0: Enable single link
+                        //     =1: Enable dual link
+                        // Bit1=0: 666RGB
+                        //     =1: 888RGB
+  UCHAR  ucAction;      // 0: turn off encoder
+                        // 1: setup and turn on encoder
+}LVDS_ENCODER_CONTROL_PARAMETERS;
 
 #define LVDS_ENCODER_CONTROL_PS_ALLOCATION  LVDS_ENCODER_CONTROL_PARAMETERS
-
+   
 #define TMDS1_ENCODER_CONTROL_PARAMETERS    LVDS_ENCODER_CONTROL_PARAMETERS
 #define TMDS1_ENCODER_CONTROL_PS_ALLOCATION TMDS1_ENCODER_CONTROL_PARAMETERS
 
 #define TMDS2_ENCODER_CONTROL_PARAMETERS    TMDS1_ENCODER_CONTROL_PARAMETERS
 #define TMDS2_ENCODER_CONTROL_PS_ALLOCATION TMDS2_ENCODER_CONTROL_PARAMETERS
 
-/* ucTableFormatRevision=1,ucTableContentRevision=2 */
-typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS_V2 {
-	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-	UCHAR ucMisc;		/*  see PANEL_ENCODER_MISC_xx definitions below */
-	UCHAR ucAction;		/*  0: turn off encoder */
-	/*  1: setup and turn on encoder */
-	UCHAR ucTruncate;	/*  bit0=0: Disable truncate */
-	/*      =1: Enable truncate */
-	/*  bit4=0: 666RGB */
-	/*      =1: 888RGB */
-	UCHAR ucSpatial;	/*  bit0=0: Disable spatial dithering */
-	/*      =1: Enable spatial dithering */
-	/*  bit4=0: 666RGB */
-	/*      =1: 888RGB */
-	UCHAR ucTemporal;	/*  bit0=0: Disable temporal dithering */
-	/*      =1: Enable temporal dithering */
-	/*  bit4=0: 666RGB */
-	/*      =1: 888RGB */
-	/*  bit5=0: Gray level 2 */
-	/*      =1: Gray level 4 */
-	UCHAR ucFRC;		/*  bit4=0: 25FRC_SEL pattern E */
-	/*      =1: 25FRC_SEL pattern F */
-	/*  bit6:5=0: 50FRC_SEL pattern A */
-	/*        =1: 50FRC_SEL pattern B */
-	/*        =2: 50FRC_SEL pattern C */
-	/*        =3: 50FRC_SEL pattern D */
-	/*  bit7=0: 75FRC_SEL pattern E */
-	/*      =1: 75FRC_SEL pattern F */
-} LVDS_ENCODER_CONTROL_PARAMETERS_V2;
+
+//ucTableFormatRevision=1,ucTableContentRevision=2
+typedef struct _LVDS_ENCODER_CONTROL_PARAMETERS_V2
+{
+  USHORT usPixelClock;  // in 10KHz; for bios convenient
+  UCHAR  ucMisc;        // see PANEL_ENCODER_MISC_xx defintions below
+  UCHAR  ucAction;      // 0: turn off encoder
+                        // 1: setup and turn on encoder
+  UCHAR  ucTruncate;    // bit0=0: Disable truncate
+                        //     =1: Enable truncate
+                        // bit4=0: 666RGB
+                        //     =1: 888RGB
+  UCHAR  ucSpatial;     // bit0=0: Disable spatial dithering
+                        //     =1: Enable spatial dithering
+                        // bit4=0: 666RGB
+                        //     =1: 888RGB
+  UCHAR  ucTemporal;    // bit0=0: Disable temporal dithering
+                        //     =1: Enable temporal dithering
+                        // bit4=0: 666RGB
+                        //     =1: 888RGB
+                        // bit5=0: Gray level 2
+                        //     =1: Gray level 4
+  UCHAR  ucFRC;         // bit4=0: 25FRC_SEL pattern E
+                        //     =1: 25FRC_SEL pattern F
+                        // bit6:5=0: 50FRC_SEL pattern A
+                        //       =1: 50FRC_SEL pattern B
+                        //       =2: 50FRC_SEL pattern C
+                        //       =3: 50FRC_SEL pattern D
+                        // bit7=0: 75FRC_SEL pattern E
+                        //     =1: 75FRC_SEL pattern F
+}LVDS_ENCODER_CONTROL_PARAMETERS_V2;
 
 #define LVDS_ENCODER_CONTROL_PS_ALLOCATION_V2  LVDS_ENCODER_CONTROL_PARAMETERS_V2
-
+   
 #define TMDS1_ENCODER_CONTROL_PARAMETERS_V2    LVDS_ENCODER_CONTROL_PARAMETERS_V2
 #define TMDS1_ENCODER_CONTROL_PS_ALLOCATION_V2 TMDS1_ENCODER_CONTROL_PARAMETERS_V2
-
+  
 #define TMDS2_ENCODER_CONTROL_PARAMETERS_V2    TMDS1_ENCODER_CONTROL_PARAMETERS_V2
 #define TMDS2_ENCODER_CONTROL_PS_ALLOCATION_V2 TMDS2_ENCODER_CONTROL_PARAMETERS_V2
 
@@ -1185,38 +1536,42 @@
 #define TMDS2_ENCODER_CONTROL_PARAMETERS_V3    LVDS_ENCODER_CONTROL_PARAMETERS_V3
 #define TMDS2_ENCODER_CONTROL_PS_ALLOCATION_V3 TMDS2_ENCODER_CONTROL_PARAMETERS_V3
 
-/****************************************************************************/
-/*  Structures used by ### */
-/****************************************************************************/
-typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS {
-	UCHAR ucEnable;		/*  Enable or Disable External TMDS encoder */
-	UCHAR ucMisc;		/*  Bit0=0:Enable Single link;=1:Enable Dual link;Bit1 {=0:666RGB, =1:888RGB} */
-	UCHAR ucPadding[2];
-} ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS;
+/****************************************************************************/	
+// Structures used by ###
+/****************************************************************************/	
+typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS
+{                               
+  UCHAR    ucEnable;            // Enable or Disable External TMDS encoder
+  UCHAR    ucMisc;              // Bit0=0:Enable Single link;=1:Enable Dual link;Bit1 {=0:666RGB, =1:888RGB}
+  UCHAR    ucPadding[2];
+}ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS;
 
-typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION {
-	ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS sXTmdsEncoder;
-	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this portion */
-} ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION;
+typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION
+{                               
+  ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS    sXTmdsEncoder;
+  WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION   sReserved;     //Caller doesn't need to init this portion
+}ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION;
 
 #define ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS_V2  LVDS_ENCODER_CONTROL_PARAMETERS_V2
 
-typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION_V2 {
-	ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS_V2 sXTmdsEncoder;
-	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this portion */
-} ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION_V2;
+typedef struct _ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION_V2
+{                               
+  ENABLE_EXTERNAL_TMDS_ENCODER_PARAMETERS_V2    sXTmdsEncoder;
+  WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION      sReserved;     //Caller doesn't need to init this portion
+}ENABLE_EXTERNAL_TMDS_ENCODER_PS_ALLOCATION_V2;
 
-typedef struct _EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION {
-	DIG_ENCODER_CONTROL_PARAMETERS sDigEncoder;
-	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
-} EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION;
+typedef struct _EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION
+{
+  DIG_ENCODER_CONTROL_PARAMETERS            sDigEncoder;
+  WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
+}EXTERNAL_ENCODER_CONTROL_PS_ALLOCATION;
 
-/****************************************************************************/
-/*  Structures used by DVOEncoderControlTable */
-/****************************************************************************/
-/* ucTableFormatRevision=1,ucTableContentRevision=3 */
+/****************************************************************************/	
+// Structures used by DVOEncoderControlTable
+/****************************************************************************/	
+//ucTableFormatRevision=1,ucTableContentRevision=3
 
-/* ucDVOConfig: */
+//ucDVOConfig:
 #define DVO_ENCODER_CONFIG_RATE_SEL							0x01
 #define DVO_ENCODER_CONFIG_DDR_SPEED						0x00
 #define DVO_ENCODER_CONFIG_SDR_SPEED						0x01
@@ -1225,21 +1580,22 @@
 #define DVO_ENCODER_CONFIG_UPPER12BIT						0x04
 #define DVO_ENCODER_CONFIG_24BIT								0x08
 
-typedef struct _DVO_ENCODER_CONTROL_PARAMETERS_V3 {
-	USHORT usPixelClock;
-	UCHAR ucDVOConfig;
-	UCHAR ucAction;		/* ATOM_ENABLE/ATOM_DISABLE/ATOM_HPD_INIT */
-	UCHAR ucReseved[4];
-} DVO_ENCODER_CONTROL_PARAMETERS_V3;
+typedef struct _DVO_ENCODER_CONTROL_PARAMETERS_V3
+{
+  USHORT usPixelClock; 
+  UCHAR  ucDVOConfig;
+  UCHAR  ucAction;														//ATOM_ENABLE/ATOM_DISABLE/ATOM_HPD_INIT
+  UCHAR  ucReseved[4];
+}DVO_ENCODER_CONTROL_PARAMETERS_V3;
 #define DVO_ENCODER_CONTROL_PS_ALLOCATION_V3	DVO_ENCODER_CONTROL_PARAMETERS_V3
 
-/* ucTableFormatRevision=1 */
-/* ucTableContentRevision=3 structure is not changed but usMisc add bit 1 as another input for */
-/*  bit1=0: non-coherent mode */
-/*      =1: coherent mode */
+//ucTableFormatRevision=1
+//ucTableContentRevision=3 structure is not changed but usMisc add bit 1 as another input for 
+// bit1=0: non-coherent mode
+//     =1: coherent mode
 
-/* ========================================================================================== */
-/* Only change is here next time when changing encoder parameter definitions again! */
+//==========================================================================================
+//Only change is here next time when changing encoder parameter definitions again!
 #define LVDS_ENCODER_CONTROL_PARAMETERS_LAST     LVDS_ENCODER_CONTROL_PARAMETERS_V3
 #define LVDS_ENCODER_CONTROL_PS_ALLOCATION_LAST  LVDS_ENCODER_CONTROL_PARAMETERS_LAST
 
@@ -1252,7 +1608,7 @@
 #define DVO_ENCODER_CONTROL_PARAMETERS_LAST      DVO_ENCODER_CONTROL_PARAMETERS
 #define DVO_ENCODER_CONTROL_PS_ALLOCATION_LAST   DVO_ENCODER_CONTROL_PS_ALLOCATION
 
-/* ========================================================================================== */
+//==========================================================================================
 #define PANEL_ENCODER_MISC_DUAL                0x01
 #define PANEL_ENCODER_MISC_COHERENT            0x02
 #define	PANEL_ENCODER_MISC_TMDS_LINKB					 0x04
@@ -1281,159 +1637,159 @@
 #define PANEL_ENCODER_75FRC_E                  0x00
 #define PANEL_ENCODER_75FRC_F                  0x80
 
-/****************************************************************************/
-/*  Structures used by SetVoltageTable */
-/****************************************************************************/
+/****************************************************************************/	
+// Structures used by SetVoltageTable
+/****************************************************************************/	
 #define SET_VOLTAGE_TYPE_ASIC_VDDC             1
 #define SET_VOLTAGE_TYPE_ASIC_MVDDC            2
 #define SET_VOLTAGE_TYPE_ASIC_MVDDQ            3
 #define SET_VOLTAGE_TYPE_ASIC_VDDCI            4
 #define SET_VOLTAGE_INIT_MODE                  5
-#define SET_VOLTAGE_GET_MAX_VOLTAGE            6	/* Gets the Max. voltage for the soldered Asic */
+#define SET_VOLTAGE_GET_MAX_VOLTAGE            6					//Gets the Max. voltage for the soldered Asic
 
 #define SET_ASIC_VOLTAGE_MODE_ALL_SOURCE       0x1
 #define SET_ASIC_VOLTAGE_MODE_SOURCE_A         0x2
 #define SET_ASIC_VOLTAGE_MODE_SOURCE_B         0x4
 
 #define	SET_ASIC_VOLTAGE_MODE_SET_VOLTAGE      0x0
-#define	SET_ASIC_VOLTAGE_MODE_GET_GPIOVAL      0x1
+#define	SET_ASIC_VOLTAGE_MODE_GET_GPIOVAL      0x1	
 #define	SET_ASIC_VOLTAGE_MODE_GET_GPIOMASK     0x2
 
-typedef struct _SET_VOLTAGE_PARAMETERS {
-	UCHAR ucVoltageType;	/*  To tell which voltage to set up, VDDC/MVDDC/MVDDQ */
-	UCHAR ucVoltageMode;	/*  To set all, to set source A or source B or ... */
-	UCHAR ucVoltageIndex;	/*  An index to tell which voltage level */
-	UCHAR ucReserved;
-} SET_VOLTAGE_PARAMETERS;
+typedef struct	_SET_VOLTAGE_PARAMETERS
+{
+  UCHAR    ucVoltageType;               // To tell which voltage to set up, VDDC/MVDDC/MVDDQ
+  UCHAR    ucVoltageMode;               // To set all, to set source A or source B or ...
+  UCHAR    ucVoltageIndex;              // An index to tell which voltage level
+  UCHAR    ucReserved;          
+}SET_VOLTAGE_PARAMETERS;
 
-typedef struct _SET_VOLTAGE_PARAMETERS_V2 {
-	UCHAR ucVoltageType;	/*  To tell which voltage to set up, VDDC/MVDDC/MVDDQ */
-	UCHAR ucVoltageMode;	/*  Not used, maybe use for state machine for differen power mode */
-	USHORT usVoltageLevel;	/*  real voltage level */
-} SET_VOLTAGE_PARAMETERS_V2;
+typedef struct	_SET_VOLTAGE_PARAMETERS_V2
+{
+  UCHAR    ucVoltageType;               // To tell which voltage to set up, VDDC/MVDDC/MVDDQ
+  UCHAR    ucVoltageMode;               // Not used, maybe use for state machine for differen power mode
+  USHORT   usVoltageLevel;              // real voltage level
+}SET_VOLTAGE_PARAMETERS_V2;
 
-typedef struct _SET_VOLTAGE_PS_ALLOCATION {
-	SET_VOLTAGE_PARAMETERS sASICSetVoltage;
-	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
-} SET_VOLTAGE_PS_ALLOCATION;
+typedef struct _SET_VOLTAGE_PS_ALLOCATION
+{
+  SET_VOLTAGE_PARAMETERS sASICSetVoltage;
+  WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;
+}SET_VOLTAGE_PS_ALLOCATION;
 
-/****************************************************************************/
-/*  Structures used by TVEncoderControlTable */
-/****************************************************************************/
-typedef struct _TV_ENCODER_CONTROL_PARAMETERS {
-	USHORT usPixelClock;	/*  in 10KHz; for bios convenient */
-	UCHAR ucTvStandard;	/*  See definition "ATOM_TV_NTSC ..." */
-	UCHAR ucAction;		/*  0: turn off encoder */
-	/*  1: setup and turn on encoder */
-} TV_ENCODER_CONTROL_PARAMETERS;
+/****************************************************************************/	
+// Structures used by TVEncoderControlTable
+/****************************************************************************/	
+typedef struct _TV_ENCODER_CONTROL_PARAMETERS
+{
+  USHORT usPixelClock;                // in 10KHz; for bios convenient
+  UCHAR  ucTvStandard;                // See definition "ATOM_TV_NTSC ..."
+  UCHAR  ucAction;                    // 0: turn off encoder
+                                      // 1: setup and turn on encoder
+}TV_ENCODER_CONTROL_PARAMETERS;
 
-typedef struct _TV_ENCODER_CONTROL_PS_ALLOCATION {
-	TV_ENCODER_CONTROL_PARAMETERS sTVEncoder;
-	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/*  Don't set this one */
-} TV_ENCODER_CONTROL_PS_ALLOCATION;
+typedef struct _TV_ENCODER_CONTROL_PS_ALLOCATION
+{
+  TV_ENCODER_CONTROL_PARAMETERS sTVEncoder;          
+  WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION    sReserved; // Don't set this one
+}TV_ENCODER_CONTROL_PS_ALLOCATION;
 
-/* ==============================Data Table Portion==================================== */
+//==============================Data Table Portion====================================
 
-#ifdef	UEFI_BUILD
-#define	UTEMP	USHORT
-#define	USHORT	void*
-#endif
+/****************************************************************************/	
+// Structure used in Data.mtb
+/****************************************************************************/	
+typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES
+{
+  USHORT        UtilityPipeLine;	        // Offest for the utility to get parser info,Don't change this position!
+  USHORT        MultimediaCapabilityInfo; // Only used by MM Lib,latest version 1.1, not configuable from Bios, need to include the table to build Bios 
+  USHORT        MultimediaConfigInfo;     // Only used by MM Lib,latest version 2.1, not configuable from Bios, need to include the table to build Bios
+  USHORT        StandardVESA_Timing;      // Only used by Bios
+  USHORT        FirmwareInfo;             // Shared by various SW components,latest version 1.4
+  USHORT        DAC_Info;                 // Will be obsolete from R600
+  USHORT        LVDS_Info;                // Shared by various SW components,latest version 1.1 
+  USHORT        TMDS_Info;                // Will be obsolete from R600
+  USHORT        AnalogTV_Info;            // Shared by various SW components,latest version 1.1 
+  USHORT        SupportedDevicesInfo;     // Will be obsolete from R600
+  USHORT        GPIO_I2C_Info;            // Shared by various SW components,latest version 1.2 will be used from R600           
+  USHORT        VRAM_UsageByFirmware;     // Shared by various SW components,latest version 1.3 will be used from R600
+  USHORT        GPIO_Pin_LUT;             // Shared by various SW components,latest version 1.1
+  USHORT        VESA_ToInternalModeLUT;   // Only used by Bios
+  USHORT        ComponentVideoInfo;       // Shared by various SW components,latest version 2.1 will be used from R600
+  USHORT        PowerPlayInfo;            // Shared by various SW components,latest version 2.1,new design from R600
+  USHORT        CompassionateData;        // Will be obsolete from R600
+  USHORT        SaveRestoreInfo;          // Only used by Bios
+  USHORT        PPLL_SS_Info;             // Shared by various SW components,latest version 1.2, used to call SS_Info, change to new name because of int ASIC SS info
+  USHORT        OemInfo;                  // Defined and used by external SW, should be obsolete soon
+  USHORT        XTMDS_Info;               // Will be obsolete from R600
+  USHORT        MclkSS_Info;              // Shared by various SW components,latest version 1.1, only enabled when ext SS chip is used
+  USHORT        Object_Header;            // Shared by various SW components,latest version 1.1
+  USHORT        IndirectIOAccess;         // Only used by Bios,this table position can't change at all!!
+  USHORT        MC_InitParameter;         // Only used by command table
+  USHORT        ASIC_VDDC_Info;						// Will be obsolete from R600
+  USHORT        ASIC_InternalSS_Info;			// New tabel name from R600, used to be called "ASIC_MVDDC_Info"
+  USHORT        TV_VideoMode;							// Only used by command table
+  USHORT        VRAM_Info;								// Only used by command table, latest version 1.3
+  USHORT        MemoryTrainingInfo;				// Used for VBIOS and Diag utility for memory training purpose since R600. the new table rev start from 2.1
+  USHORT        IntegratedSystemInfo;			// Shared by various SW components
+  USHORT        ASIC_ProfilingInfo;				// New table name from R600, used to be called "ASIC_VDDCI_Info" for pre-R600
+  USHORT        VoltageObjectInfo;				// Shared by various SW components, latest version 1.1
+	USHORT				PowerSourceInfo;					// Shared by various SW components, latest versoin 1.1
+}ATOM_MASTER_LIST_OF_DATA_TABLES;
 
-/****************************************************************************/
-/*  Structure used in Data.mtb */
-/****************************************************************************/
-typedef struct _ATOM_MASTER_LIST_OF_DATA_TABLES {
-	USHORT UtilityPipeLine;	/*  Offest for the utility to get parser info,Don't change this position! */
-	USHORT MultimediaCapabilityInfo;	/*  Only used by MM Lib,latest version 1.1, not configuable from Bios, need to include the table to build Bios */
-	USHORT MultimediaConfigInfo;	/*  Only used by MM Lib,latest version 2.1, not configuable from Bios, need to include the table to build Bios */
-	USHORT StandardVESA_Timing;	/*  Only used by Bios */
-	USHORT FirmwareInfo;	/*  Shared by various SW components,latest version 1.4 */
-	USHORT DAC_Info;	/*  Will be obsolete from R600 */
-	USHORT LVDS_Info;	/*  Shared by various SW components,latest version 1.1 */
-	USHORT TMDS_Info;	/*  Will be obsolete from R600 */
-	USHORT AnalogTV_Info;	/*  Shared by various SW components,latest version 1.1 */
-	USHORT SupportedDevicesInfo;	/*  Will be obsolete from R600 */
-	USHORT GPIO_I2C_Info;	/*  Shared by various SW components,latest version 1.2 will be used from R600 */
-	USHORT VRAM_UsageByFirmware;	/*  Shared by various SW components,latest version 1.3 will be used from R600 */
-	USHORT GPIO_Pin_LUT;	/*  Shared by various SW components,latest version 1.1 */
-	USHORT VESA_ToInternalModeLUT;	/*  Only used by Bios */
-	USHORT ComponentVideoInfo;	/*  Shared by various SW components,latest version 2.1 will be used from R600 */
-	USHORT PowerPlayInfo;	/*  Shared by various SW components,latest version 2.1,new design from R600 */
-	USHORT CompassionateData;	/*  Will be obsolete from R600 */
-	USHORT SaveRestoreInfo;	/*  Only used by Bios */
-	USHORT PPLL_SS_Info;	/*  Shared by various SW components,latest version 1.2, used to call SS_Info, change to new name because of int ASIC SS info */
-	USHORT OemInfo;		/*  Defined and used by external SW, should be obsolete soon */
-	USHORT XTMDS_Info;	/*  Will be obsolete from R600 */
-	USHORT MclkSS_Info;	/*  Shared by various SW components,latest version 1.1, only enabled when ext SS chip is used */
-	USHORT Object_Header;	/*  Shared by various SW components,latest version 1.1 */
-	USHORT IndirectIOAccess;	/*  Only used by Bios,this table position can't change at all!! */
-	USHORT MC_InitParameter;	/*  Only used by command table */
-	USHORT ASIC_VDDC_Info;	/*  Will be obsolete from R600 */
-	USHORT ASIC_InternalSS_Info;	/*  New tabel name from R600, used to be called "ASIC_MVDDC_Info" */
-	USHORT TV_VideoMode;	/*  Only used by command table */
-	USHORT VRAM_Info;	/*  Only used by command table, latest version 1.3 */
-	USHORT MemoryTrainingInfo;	/*  Used for VBIOS and Diag utility for memory training purpose since R600. the new table rev start from 2.1 */
-	USHORT IntegratedSystemInfo;	/*  Shared by various SW components */
-	USHORT ASIC_ProfilingInfo;	/*  New table name from R600, used to be called "ASIC_VDDCI_Info" for pre-R600 */
-	USHORT VoltageObjectInfo;	/*  Shared by various SW components, latest version 1.1 */
-	USHORT PowerSourceInfo;	/*  Shared by various SW components, latest versoin 1.1 */
-} ATOM_MASTER_LIST_OF_DATA_TABLES;
+typedef struct _ATOM_MASTER_DATA_TABLE
+{ 
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  ATOM_MASTER_LIST_OF_DATA_TABLES   ListOfDataTables;
+}ATOM_MASTER_DATA_TABLE;
 
-#ifdef	UEFI_BUILD
-#define	USHORT	UTEMP
-#endif
+/****************************************************************************/	
+// Structure used in MultimediaCapabilityInfoTable
+/****************************************************************************/	
+typedef struct _ATOM_MULTIMEDIA_CAPABILITY_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  ULONG                    ulSignature;      // HW info table signature string "$ATI"
+  UCHAR                    ucI2C_Type;       // I2C type (normal GP_IO, ImpactTV GP_IO, Dedicated I2C pin, etc)
+  UCHAR                    ucTV_OutInfo;     // Type of TV out supported (3:0) and video out crystal frequency (6:4) and TV data port (7)
+  UCHAR                    ucVideoPortInfo;  // Provides the video port capabilities
+  UCHAR                    ucHostPortInfo;   // Provides host port configuration information
+}ATOM_MULTIMEDIA_CAPABILITY_INFO;
 
-typedef struct _ATOM_MASTER_DATA_TABLE {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_MASTER_LIST_OF_DATA_TABLES ListOfDataTables;
-} ATOM_MASTER_DATA_TABLE;
+/****************************************************************************/	
+// Structure used in MultimediaConfigInfoTable
+/****************************************************************************/	
+typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;
+  ULONG                    ulSignature;      // MM info table signature sting "$MMT"
+  UCHAR                    ucTunerInfo;      // Type of tuner installed on the adapter (4:0) and video input for tuner (7:5)
+  UCHAR                    ucAudioChipInfo;  // List the audio chip type (3:0) product type (4) and OEM revision (7:5)
+  UCHAR                    ucProductID;      // Defines as OEM ID or ATI board ID dependent on product type setting
+  UCHAR                    ucMiscInfo1;      // Tuner voltage (1:0) HW teletext support (3:2) FM audio decoder (5:4) reserved (6) audio scrambling (7)
+  UCHAR                    ucMiscInfo2;      // I2S input config (0) I2S output config (1) I2S Audio Chip (4:2) SPDIF Output Config (5) reserved (7:6)
+  UCHAR                    ucMiscInfo3;      // Video Decoder Type (3:0) Video In Standard/Crystal (7:4)
+  UCHAR                    ucMiscInfo4;      // Video Decoder Host Config (2:0) reserved (7:3)
+  UCHAR                    ucVideoInput0Info;// Video Input 0 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6)
+  UCHAR                    ucVideoInput1Info;// Video Input 1 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6)
+  UCHAR                    ucVideoInput2Info;// Video Input 2 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6)
+  UCHAR                    ucVideoInput3Info;// Video Input 3 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6)
+  UCHAR                    ucVideoInput4Info;// Video Input 4 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6)
+}ATOM_MULTIMEDIA_CONFIG_INFO;
 
-/****************************************************************************/
-/*  Structure used in MultimediaCapabilityInfoTable */
-/****************************************************************************/
-typedef struct _ATOM_MULTIMEDIA_CAPABILITY_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulSignature;	/*  HW info table signature string "$ATI" */
-	UCHAR ucI2C_Type;	/*  I2C type (normal GP_IO, ImpactTV GP_IO, Dedicated I2C pin, etc) */
-	UCHAR ucTV_OutInfo;	/*  Type of TV out supported (3:0) and video out crystal frequency (6:4) and TV data port (7) */
-	UCHAR ucVideoPortInfo;	/*  Provides the video port capabilities */
-	UCHAR ucHostPortInfo;	/*  Provides host port configuration information */
-} ATOM_MULTIMEDIA_CAPABILITY_INFO;
+/****************************************************************************/	
+// Structures used in FirmwareInfoTable
+/****************************************************************************/	
 
-/****************************************************************************/
-/*  Structure used in MultimediaConfigInfoTable */
-/****************************************************************************/
-typedef struct _ATOM_MULTIMEDIA_CONFIG_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulSignature;	/*  MM info table signature sting "$MMT" */
-	UCHAR ucTunerInfo;	/*  Type of tuner installed on the adapter (4:0) and video input for tuner (7:5) */
-	UCHAR ucAudioChipInfo;	/*  List the audio chip type (3:0) product type (4) and OEM revision (7:5) */
-	UCHAR ucProductID;	/*  Defines as OEM ID or ATI board ID dependent on product type setting */
-	UCHAR ucMiscInfo1;	/*  Tuner voltage (1:0) HW teletext support (3:2) FM audio decoder (5:4) reserved (6) audio scrambling (7) */
-	UCHAR ucMiscInfo2;	/*  I2S input config (0) I2S output config (1) I2S Audio Chip (4:2) SPDIF Output Config (5) reserved (7:6) */
-	UCHAR ucMiscInfo3;	/*  Video Decoder Type (3:0) Video In Standard/Crystal (7:4) */
-	UCHAR ucMiscInfo4;	/*  Video Decoder Host Config (2:0) reserved (7:3) */
-	UCHAR ucVideoInput0Info;	/*  Video Input 0 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
-	UCHAR ucVideoInput1Info;	/*  Video Input 1 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
-	UCHAR ucVideoInput2Info;	/*  Video Input 2 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
-	UCHAR ucVideoInput3Info;	/*  Video Input 3 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
-	UCHAR ucVideoInput4Info;	/*  Video Input 4 Type (1:0) F/B setting (2) physical connector ID (5:3) reserved (7:6) */
-} ATOM_MULTIMEDIA_CONFIG_INFO;
-
-/****************************************************************************/
-/*  Structures used in FirmwareInfoTable */
-/****************************************************************************/
-
-/*  usBIOSCapability Definition: */
-/*  Bit 0 = 0: Bios image is not Posted, =1:Bios image is Posted; */
-/*  Bit 1 = 0: Dual CRTC is not supported, =1: Dual CRTC is supported; */
-/*  Bit 2 = 0: Extended Desktop is not supported, =1: Extended Desktop is supported; */
-/*  Others: Reserved */
+// usBIOSCapability Defintion:
+// Bit 0 = 0: Bios image is not Posted, =1:Bios image is Posted; 
+// Bit 1 = 0: Dual CRTC is not supported, =1: Dual CRTC is supported; 
+// Bit 2 = 0: Extended Desktop is not supported, =1: Extended Desktop is supported; 
+// Others: Reserved
 #define ATOM_BIOS_INFO_ATOM_FIRMWARE_POSTED         0x0001
 #define ATOM_BIOS_INFO_DUAL_CRTC_SUPPORT            0x0002
 #define ATOM_BIOS_INFO_EXTENDED_DESKTOP_SUPPORT     0x0004
-#define ATOM_BIOS_INFO_MEMORY_CLOCK_SS_SUPPORT      0x0008
-#define ATOM_BIOS_INFO_ENGINE_CLOCK_SS_SUPPORT      0x0010
+#define ATOM_BIOS_INFO_MEMORY_CLOCK_SS_SUPPORT      0x0008		// (valid from v1.1 ~v1.4):=1: memclk SS enable, =0 memclk SS disable. 
+#define ATOM_BIOS_INFO_ENGINE_CLOCK_SS_SUPPORT      0x0010		// (valid from v1.1 ~v1.4):=1: engclk SS enable, =0 engclk SS disable. 
 #define ATOM_BIOS_INFO_BL_CONTROLLED_BY_GPU         0x0020
 #define ATOM_BIOS_INFO_WMI_SUPPORT                  0x0040
 #define ATOM_BIOS_INFO_PPMODE_ASSIGNGED_BY_SYSTEM   0x0080
@@ -1441,242 +1797,292 @@
 #define ATOM_BIOS_INFO_HYPERMEMORY_SIZE_MASK        0x1E00
 #define ATOM_BIOS_INFO_VPOST_WITHOUT_FIRST_MODE_SET 0x2000
 #define ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE  0x4000
+#define ATOM_BIOS_INFO_MEMORY_CLOCK_EXT_SS_SUPPORT  0x0008		// (valid from v2.1 ): =1: memclk ss enable with external ss chip
+#define ATOM_BIOS_INFO_ENGINE_CLOCK_EXT_SS_SUPPORT  0x0010		// (valid from v2.1 ): =1: engclk ss enable with external ss chip
 
 #ifndef _H2INC
 
-/* Please don't add or expand this bitfield structure below, this one will retire soon.! */
-typedef struct _ATOM_FIRMWARE_CAPABILITY {
+//Please don't add or expand this bitfield structure below, this one will retire soon.!
+typedef struct _ATOM_FIRMWARE_CAPABILITY
+{
 #if ATOM_BIG_ENDIAN
-	USHORT Reserved:3;
-	USHORT HyperMemory_Size:4;
-	USHORT HyperMemory_Support:1;
-	USHORT PPMode_Assigned:1;
-	USHORT WMI_SUPPORT:1;
-	USHORT GPUControlsBL:1;
-	USHORT EngineClockSS_Support:1;
-	USHORT MemoryClockSS_Support:1;
-	USHORT ExtendedDesktopSupport:1;
-	USHORT DualCRTC_Support:1;
-	USHORT FirmwarePosted:1;
+  USHORT Reserved:3;
+  USHORT HyperMemory_Size:4;
+  USHORT HyperMemory_Support:1;
+  USHORT PPMode_Assigned:1;
+  USHORT WMI_SUPPORT:1;
+  USHORT GPUControlsBL:1;
+  USHORT EngineClockSS_Support:1;
+  USHORT MemoryClockSS_Support:1;
+  USHORT ExtendedDesktopSupport:1;
+  USHORT DualCRTC_Support:1;
+  USHORT FirmwarePosted:1;
 #else
-	USHORT FirmwarePosted:1;
-	USHORT DualCRTC_Support:1;
-	USHORT ExtendedDesktopSupport:1;
-	USHORT MemoryClockSS_Support:1;
-	USHORT EngineClockSS_Support:1;
-	USHORT GPUControlsBL:1;
-	USHORT WMI_SUPPORT:1;
-	USHORT PPMode_Assigned:1;
-	USHORT HyperMemory_Support:1;
-	USHORT HyperMemory_Size:4;
-	USHORT Reserved:3;
+  USHORT FirmwarePosted:1;
+  USHORT DualCRTC_Support:1;
+  USHORT ExtendedDesktopSupport:1;
+  USHORT MemoryClockSS_Support:1;
+  USHORT EngineClockSS_Support:1;
+  USHORT GPUControlsBL:1;
+  USHORT WMI_SUPPORT:1;
+  USHORT PPMode_Assigned:1;
+  USHORT HyperMemory_Support:1;
+  USHORT HyperMemory_Size:4;
+  USHORT Reserved:3;
 #endif
-} ATOM_FIRMWARE_CAPABILITY;
+}ATOM_FIRMWARE_CAPABILITY;
 
-typedef union _ATOM_FIRMWARE_CAPABILITY_ACCESS {
-	ATOM_FIRMWARE_CAPABILITY sbfAccess;
-	USHORT susAccess;
-} ATOM_FIRMWARE_CAPABILITY_ACCESS;
+typedef union _ATOM_FIRMWARE_CAPABILITY_ACCESS
+{
+  ATOM_FIRMWARE_CAPABILITY sbfAccess;
+  USHORT                   susAccess;
+}ATOM_FIRMWARE_CAPABILITY_ACCESS;
 
 #else
 
-typedef union _ATOM_FIRMWARE_CAPABILITY_ACCESS {
-	USHORT susAccess;
-} ATOM_FIRMWARE_CAPABILITY_ACCESS;
+typedef union _ATOM_FIRMWARE_CAPABILITY_ACCESS
+{
+  USHORT                   susAccess;
+}ATOM_FIRMWARE_CAPABILITY_ACCESS;
 
 #endif
 
-typedef struct _ATOM_FIRMWARE_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulFirmwareRevision;
-	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
-	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
-	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
-	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
-	UCHAR ucASICMaxTemperature;
-	UCHAR ucPadding[3];	/* Don't use them */
-	ULONG aulReservedForBIOS[3];	/* Don't use them */
-	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
-	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit, the definitions above can't change!!! */
-	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
-	USHORT usReferenceClock;	/* In 10Khz unit */
-	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
-	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
-	UCHAR ucDesign_ID;	/* Indicate what is the board design */
-	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
-} ATOM_FIRMWARE_INFO;
+typedef struct _ATOM_FIRMWARE_INFO
+{
+  ATOM_COMMON_TABLE_HEADER        sHeader; 
+  ULONG                           ulFirmwareRevision;
+  ULONG                           ulDefaultEngineClock;       //In 10Khz unit
+  ULONG                           ulDefaultMemoryClock;       //In 10Khz unit
+  ULONG                           ulDriverTargetEngineClock;  //In 10Khz unit
+  ULONG                           ulDriverTargetMemoryClock;  //In 10Khz unit
+  ULONG                           ulMaxEngineClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxMemoryClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxPixelClockPLL_Output;  //In 10Khz unit
+  ULONG                           ulASICMaxEngineClock;       //In 10Khz unit
+  ULONG                           ulASICMaxMemoryClock;       //In 10Khz unit
+  UCHAR                           ucASICMaxTemperature;
+  UCHAR                           ucPadding[3];               //Don't use them
+  ULONG                           aulReservedForBIOS[3];      //Don't use them
+  USHORT                          usMinEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Output; //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Output; //In 10Khz unit
+  USHORT                          usMaxPixelClock;            //In 10Khz unit, Max.  Pclk
+  USHORT                          usMinPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMaxPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMinPixelClockPLL_Output;  //In 10Khz unit, the definitions above can't change!!!
+  ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+  USHORT                          usReferenceClock;           //In 10Khz unit	
+  USHORT                          usPM_RTS_Location;          //RTS PM4 starting location in ROM in 1Kb unit 
+  UCHAR                           ucPM_RTS_StreamSize;        //RTS PM4 packets in Kb unit
+  UCHAR                           ucDesign_ID;                //Indicate what is the board design
+  UCHAR                           ucMemoryModule_ID;          //Indicate what is the board design
+}ATOM_FIRMWARE_INFO;
 
-typedef struct _ATOM_FIRMWARE_INFO_V1_2 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulFirmwareRevision;
-	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
-	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
-	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
-	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
-	UCHAR ucASICMaxTemperature;
-	UCHAR ucMinAllowedBL_Level;
-	UCHAR ucPadding[2];	/* Don't use them */
-	ULONG aulReservedForBIOS[2];	/* Don't use them */
-	ULONG ulMinPixelClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
-	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output */
-	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
-	USHORT usReferenceClock;	/* In 10Khz unit */
-	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
-	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
-	UCHAR ucDesign_ID;	/* Indicate what is the board design */
-	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
-} ATOM_FIRMWARE_INFO_V1_2;
+typedef struct _ATOM_FIRMWARE_INFO_V1_2
+{
+  ATOM_COMMON_TABLE_HEADER        sHeader; 
+  ULONG                           ulFirmwareRevision;
+  ULONG                           ulDefaultEngineClock;       //In 10Khz unit
+  ULONG                           ulDefaultMemoryClock;       //In 10Khz unit
+  ULONG                           ulDriverTargetEngineClock;  //In 10Khz unit
+  ULONG                           ulDriverTargetMemoryClock;  //In 10Khz unit
+  ULONG                           ulMaxEngineClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxMemoryClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxPixelClockPLL_Output;  //In 10Khz unit
+  ULONG                           ulASICMaxEngineClock;       //In 10Khz unit
+  ULONG                           ulASICMaxMemoryClock;       //In 10Khz unit
+  UCHAR                           ucASICMaxTemperature;
+  UCHAR                           ucMinAllowedBL_Level;
+  UCHAR                           ucPadding[2];               //Don't use them
+  ULONG                           aulReservedForBIOS[2];      //Don't use them
+  ULONG                           ulMinPixelClockPLL_Output;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Output; //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Output; //In 10Khz unit
+  USHORT                          usMaxPixelClock;            //In 10Khz unit, Max.  Pclk
+  USHORT                          usMinPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMaxPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMinPixelClockPLL_Output;  //In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output
+  ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+  USHORT                          usReferenceClock;           //In 10Khz unit	
+  USHORT                          usPM_RTS_Location;          //RTS PM4 starting location in ROM in 1Kb unit 
+  UCHAR                           ucPM_RTS_StreamSize;        //RTS PM4 packets in Kb unit
+  UCHAR                           ucDesign_ID;                //Indicate what is the board design
+  UCHAR                           ucMemoryModule_ID;          //Indicate what is the board design
+}ATOM_FIRMWARE_INFO_V1_2;
 
-typedef struct _ATOM_FIRMWARE_INFO_V1_3 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulFirmwareRevision;
-	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
-	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
-	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
-	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
-	UCHAR ucASICMaxTemperature;
-	UCHAR ucMinAllowedBL_Level;
-	UCHAR ucPadding[2];	/* Don't use them */
-	ULONG aulReservedForBIOS;	/* Don't use them */
-	ULONG ul3DAccelerationEngineClock;	/* In 10Khz unit */
-	ULONG ulMinPixelClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
-	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output */
-	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
-	USHORT usReferenceClock;	/* In 10Khz unit */
-	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
-	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
-	UCHAR ucDesign_ID;	/* Indicate what is the board design */
-	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
-} ATOM_FIRMWARE_INFO_V1_3;
+typedef struct _ATOM_FIRMWARE_INFO_V1_3
+{
+  ATOM_COMMON_TABLE_HEADER        sHeader; 
+  ULONG                           ulFirmwareRevision;
+  ULONG                           ulDefaultEngineClock;       //In 10Khz unit
+  ULONG                           ulDefaultMemoryClock;       //In 10Khz unit
+  ULONG                           ulDriverTargetEngineClock;  //In 10Khz unit
+  ULONG                           ulDriverTargetMemoryClock;  //In 10Khz unit
+  ULONG                           ulMaxEngineClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxMemoryClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxPixelClockPLL_Output;  //In 10Khz unit
+  ULONG                           ulASICMaxEngineClock;       //In 10Khz unit
+  ULONG                           ulASICMaxMemoryClock;       //In 10Khz unit
+  UCHAR                           ucASICMaxTemperature;
+  UCHAR                           ucMinAllowedBL_Level;
+  UCHAR                           ucPadding[2];               //Don't use them
+  ULONG                           aulReservedForBIOS;         //Don't use them
+  ULONG                           ul3DAccelerationEngineClock;//In 10Khz unit
+  ULONG                           ulMinPixelClockPLL_Output;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Output; //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Output; //In 10Khz unit
+  USHORT                          usMaxPixelClock;            //In 10Khz unit, Max.  Pclk
+  USHORT                          usMinPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMaxPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMinPixelClockPLL_Output;  //In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output
+  ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+  USHORT                          usReferenceClock;           //In 10Khz unit	
+  USHORT                          usPM_RTS_Location;          //RTS PM4 starting location in ROM in 1Kb unit 
+  UCHAR                           ucPM_RTS_StreamSize;        //RTS PM4 packets in Kb unit
+  UCHAR                           ucDesign_ID;                //Indicate what is the board design
+  UCHAR                           ucMemoryModule_ID;          //Indicate what is the board design
+}ATOM_FIRMWARE_INFO_V1_3;
 
-typedef struct _ATOM_FIRMWARE_INFO_V1_4 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulFirmwareRevision;
-	ULONG ulDefaultEngineClock;	/* In 10Khz unit */
-	ULONG ulDefaultMemoryClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetEngineClock;	/* In 10Khz unit */
-	ULONG ulDriverTargetMemoryClock;	/* In 10Khz unit */
-	ULONG ulMaxEngineClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxMemoryClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulMaxPixelClockPLL_Output;	/* In 10Khz unit */
-	ULONG ulASICMaxEngineClock;	/* In 10Khz unit */
-	ULONG ulASICMaxMemoryClock;	/* In 10Khz unit */
-	UCHAR ucASICMaxTemperature;
-	UCHAR ucMinAllowedBL_Level;
-	USHORT usBootUpVDDCVoltage;	/* In MV unit */
-	USHORT usLcdMinPixelClockPLL_Output;	/*  In MHz unit */
-	USHORT usLcdMaxPixelClockPLL_Output;	/*  In MHz unit */
-	ULONG ul3DAccelerationEngineClock;	/* In 10Khz unit */
-	ULONG ulMinPixelClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMinEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxEngineClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinEngineClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxMemoryClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinMemoryClockPLL_Output;	/* In 10Khz unit */
-	USHORT usMaxPixelClock;	/* In 10Khz unit, Max.  Pclk */
-	USHORT usMinPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMaxPixelClockPLL_Input;	/* In 10Khz unit */
-	USHORT usMinPixelClockPLL_Output;	/* In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output */
-	ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
-	USHORT usReferenceClock;	/* In 10Khz unit */
-	USHORT usPM_RTS_Location;	/* RTS PM4 starting location in ROM in 1Kb unit */
-	UCHAR ucPM_RTS_StreamSize;	/* RTS PM4 packets in Kb unit */
-	UCHAR ucDesign_ID;	/* Indicate what is the board design */
-	UCHAR ucMemoryModule_ID;	/* Indicate what is the board design */
-} ATOM_FIRMWARE_INFO_V1_4;
+typedef struct _ATOM_FIRMWARE_INFO_V1_4
+{
+  ATOM_COMMON_TABLE_HEADER        sHeader; 
+  ULONG                           ulFirmwareRevision;
+  ULONG                           ulDefaultEngineClock;       //In 10Khz unit
+  ULONG                           ulDefaultMemoryClock;       //In 10Khz unit
+  ULONG                           ulDriverTargetEngineClock;  //In 10Khz unit
+  ULONG                           ulDriverTargetMemoryClock;  //In 10Khz unit
+  ULONG                           ulMaxEngineClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxMemoryClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxPixelClockPLL_Output;  //In 10Khz unit
+  ULONG                           ulASICMaxEngineClock;       //In 10Khz unit
+  ULONG                           ulASICMaxMemoryClock;       //In 10Khz unit
+  UCHAR                           ucASICMaxTemperature;
+  UCHAR                           ucMinAllowedBL_Level;
+  USHORT                          usBootUpVDDCVoltage;        //In MV unit
+  USHORT                          usLcdMinPixelClockPLL_Output; // In MHz unit
+  USHORT                          usLcdMaxPixelClockPLL_Output; // In MHz unit
+  ULONG                           ul3DAccelerationEngineClock;//In 10Khz unit
+  ULONG                           ulMinPixelClockPLL_Output;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Output; //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Output; //In 10Khz unit
+  USHORT                          usMaxPixelClock;            //In 10Khz unit, Max.  Pclk
+  USHORT                          usMinPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMaxPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMinPixelClockPLL_Output;  //In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output
+  ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+  USHORT                          usReferenceClock;           //In 10Khz unit	
+  USHORT                          usPM_RTS_Location;          //RTS PM4 starting location in ROM in 1Kb unit 
+  UCHAR                           ucPM_RTS_StreamSize;        //RTS PM4 packets in Kb unit
+  UCHAR                           ucDesign_ID;                //Indicate what is the board design
+  UCHAR                           ucMemoryModule_ID;          //Indicate what is the board design
+}ATOM_FIRMWARE_INFO_V1_4;
 
-#define ATOM_FIRMWARE_INFO_LAST  ATOM_FIRMWARE_INFO_V1_4
+//the structure below to be used from Cypress
+typedef struct _ATOM_FIRMWARE_INFO_V2_1
+{
+  ATOM_COMMON_TABLE_HEADER        sHeader; 
+  ULONG                           ulFirmwareRevision;
+  ULONG                           ulDefaultEngineClock;       //In 10Khz unit
+  ULONG                           ulDefaultMemoryClock;       //In 10Khz unit
+  ULONG                           ulReserved1;
+  ULONG                           ulReserved2;
+  ULONG                           ulMaxEngineClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxMemoryClockPLL_Output; //In 10Khz unit
+  ULONG                           ulMaxPixelClockPLL_Output;  //In 10Khz unit
+  ULONG                           ulBinaryAlteredInfo;        //Was ulASICMaxEngineClock
+  ULONG                           ulDefaultDispEngineClkFreq; //In 10Khz unit
+  UCHAR                           ucReserved1;                //Was ucASICMaxTemperature;
+  UCHAR                           ucMinAllowedBL_Level;
+  USHORT                          usBootUpVDDCVoltage;        //In MV unit
+  USHORT                          usLcdMinPixelClockPLL_Output; // In MHz unit
+  USHORT                          usLcdMaxPixelClockPLL_Output; // In MHz unit
+  ULONG                           ulReserved4;                //Was ulAsicMaximumVoltage
+  ULONG                           ulMinPixelClockPLL_Output;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxEngineClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinEngineClockPLL_Output; //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMaxMemoryClockPLL_Input;  //In 10Khz unit
+  USHORT                          usMinMemoryClockPLL_Output; //In 10Khz unit
+  USHORT                          usMaxPixelClock;            //In 10Khz unit, Max.  Pclk
+  USHORT                          usMinPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMaxPixelClockPLL_Input;   //In 10Khz unit
+  USHORT                          usMinPixelClockPLL_Output;  //In 10Khz unit - lower 16bit of ulMinPixelClockPLL_Output
+  ATOM_FIRMWARE_CAPABILITY_ACCESS usFirmwareCapability;
+  USHORT                          usCoreReferenceClock;       //In 10Khz unit	
+  USHORT                          usMemoryReferenceClock;     //In 10Khz unit	
+  USHORT                          usUniphyDPModeExtClkFreq;   //In 10Khz unit, if it is 0, In DP Mode Uniphy Input clock from internal PPLL, otherwise Input clock from external Spread clock
+  UCHAR                           ucMemoryModule_ID;          //Indicate what is the board design
+  UCHAR                           ucReserved4[3];
+}ATOM_FIRMWARE_INFO_V2_1;
 
-/****************************************************************************/
-/*  Structures used in IntegratedSystemInfoTable */
-/****************************************************************************/
+
+#define ATOM_FIRMWARE_INFO_LAST  ATOM_FIRMWARE_INFO_V2_1
+
+/****************************************************************************/	
+// Structures used in IntegratedSystemInfoTable
+/****************************************************************************/	
 #define IGP_CAP_FLAG_DYNAMIC_CLOCK_EN      0x2
 #define IGP_CAP_FLAG_AC_CARD               0x4
 #define IGP_CAP_FLAG_SDVO_CARD             0x8
 #define IGP_CAP_FLAG_POSTDIV_BY_2_MODE     0x10
 
-typedef struct _ATOM_INTEGRATED_SYSTEM_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulBootUpEngineClock;	/* in 10kHz unit */
-	ULONG ulBootUpMemoryClock;	/* in 10kHz unit */
-	ULONG ulMaxSystemMemoryClock;	/* in 10kHz unit */
-	ULONG ulMinSystemMemoryClock;	/* in 10kHz unit */
-	UCHAR ucNumberOfCyclesInPeriodHi;
-	UCHAR ucLCDTimingSel;	/* =0:not valid.!=0 sel this timing descriptor from LCD EDID. */
-	USHORT usReserved1;
-	USHORT usInterNBVoltageLow;	/* An intermidiate PMW value to set the voltage */
-	USHORT usInterNBVoltageHigh;	/* Another intermidiate PMW value to set the voltage */
-	ULONG ulReserved[2];
+typedef struct _ATOM_INTEGRATED_SYSTEM_INFO
+{
+  ATOM_COMMON_TABLE_HEADER        sHeader; 
+  ULONG	                          ulBootUpEngineClock;		    //in 10kHz unit
+  ULONG	                          ulBootUpMemoryClock;		    //in 10kHz unit
+  ULONG	                          ulMaxSystemMemoryClock;	    //in 10kHz unit
+  ULONG	                          ulMinSystemMemoryClock;	    //in 10kHz unit
+  UCHAR                           ucNumberOfCyclesInPeriodHi;
+  UCHAR                           ucLCDTimingSel;             //=0:not valid.!=0 sel this timing descriptor from LCD EDID.
+  USHORT                          usReserved1;
+  USHORT                          usInterNBVoltageLow;        //An intermidiate PMW value to set the voltage 
+  USHORT                          usInterNBVoltageHigh;       //Another intermidiate PMW value to set the voltage 
+  ULONG	                          ulReserved[2];
 
-	USHORT usFSBClock;	/* In MHz unit */
-	USHORT usCapabilityFlag;	/* Bit0=1 indicates the fake HDMI support,Bit1=0/1 for Dynamic clocking dis/enable */
-	/* Bit[3:2]== 0:No PCIE card, 1:AC card, 2:SDVO card */
-	/* Bit[4]==1: P/2 mode, ==0: P/1 mode */
-	USHORT usPCIENBCfgReg7;	/* bit[7:0]=MUX_Sel, bit[9:8]=MUX_SEL_LEVEL2, bit[10]=Lane_Reversal */
-	USHORT usK8MemoryClock;	/* in MHz unit */
-	USHORT usK8SyncStartDelay;	/* in 0.01 us unit */
-	USHORT usK8DataReturnTime;	/* in 0.01 us unit */
-	UCHAR ucMaxNBVoltage;
-	UCHAR ucMinNBVoltage;
-	UCHAR ucMemoryType;	/* [7:4]=1:DDR1;=2:DDR2;=3:DDR3.[3:0] is reserved */
-	UCHAR ucNumberOfCyclesInPeriod;	/* CG.FVTHROT_PWM_CTRL_REG0.NumberOfCyclesInPeriod */
-	UCHAR ucStartingPWM_HighTime;	/* CG.FVTHROT_PWM_CTRL_REG0.StartingPWM_HighTime */
-	UCHAR ucHTLinkWidth;	/* 16 bit vs. 8 bit */
-	UCHAR ucMaxNBVoltageHigh;
-	UCHAR ucMinNBVoltageHigh;
-} ATOM_INTEGRATED_SYSTEM_INFO;
+  USHORT	                        usFSBClock;			            //In MHz unit
+  USHORT                          usCapabilityFlag;		        //Bit0=1 indicates the fake HDMI support,Bit1=0/1 for Dynamic clocking dis/enable
+																                              //Bit[3:2]== 0:No PCIE card, 1:AC card, 2:SDVO card
+                                                              //Bit[4]==1: P/2 mode, ==0: P/1 mode
+  USHORT	                        usPCIENBCfgReg7;				    //bit[7:0]=MUX_Sel, bit[9:8]=MUX_SEL_LEVEL2, bit[10]=Lane_Reversal
+  USHORT	                        usK8MemoryClock;            //in MHz unit
+  USHORT	                        usK8SyncStartDelay;         //in 0.01 us unit
+  USHORT	                        usK8DataReturnTime;         //in 0.01 us unit
+  UCHAR                           ucMaxNBVoltage;
+  UCHAR                           ucMinNBVoltage;
+  UCHAR                           ucMemoryType;					      //[7:4]=1:DDR1;=2:DDR2;=3:DDR3.[3:0] is reserved
+  UCHAR                           ucNumberOfCyclesInPeriod;		//CG.FVTHROT_PWM_CTRL_REG0.NumberOfCyclesInPeriod 
+  UCHAR                           ucStartingPWM_HighTime;     //CG.FVTHROT_PWM_CTRL_REG0.StartingPWM_HighTime
+  UCHAR                           ucHTLinkWidth;              //16 bit vs. 8 bit
+  UCHAR                           ucMaxNBVoltageHigh;    
+  UCHAR                           ucMinNBVoltageHigh;
+}ATOM_INTEGRATED_SYSTEM_INFO;
 
 /* Explanation on entries in ATOM_INTEGRATED_SYSTEM_INFO
-ulBootUpMemoryClock:    For Intel IGP,it's the UMA system memory clock
+ulBootUpMemoryClock:    For Intel IGP,it's the UMA system memory clock 
                         For AMD IGP,it's 0 if no SidePort memory installed or it's the boot-up SidePort memory clock
 ulMaxSystemMemoryClock: For Intel IGP,it's the Max freq from memory SPD if memory runs in ASYNC mode or otherwise (SYNC mode) it's 0
                         For AMD IGP,for now this can be 0
-ulMinSystemMemoryClock: For Intel IGP,it's 133MHz if memory runs in ASYNC mode or otherwise (SYNC mode) it's 0
+ulMinSystemMemoryClock: For Intel IGP,it's 133MHz if memory runs in ASYNC mode or otherwise (SYNC mode) it's 0 
                         For AMD IGP,for now this can be 0
 
-usFSBClock:             For Intel IGP,it's FSB Freq
+usFSBClock:             For Intel IGP,it's FSB Freq 
                         For AMD IGP,it's HT Link Speed
 
 usK8MemoryClock:        For AMD IGP only. For RevF CPU, set it to 200
@@ -1687,98 +2093,113 @@
 ucMaxNBVoltage:         Voltage regulator dependent PWM value. Low 8 bits of the value for the max voltage.Set this one to 0xFF if VC without PWM. Set this to 0x0 if no VC at all.
 ucMinNBVoltage:         Voltage regulator dependent PWM value. Low 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all.
 
-ucNumberOfCyclesInPeriod:   Indicate how many cycles when PWM duty is 100%. low 8 bits of the value.
-ucNumberOfCyclesInPeriodHi: Indicate how many cycles when PWM duty is 100%. high 8 bits of the value.If the PWM has an inverter,set bit [7]==1,otherwise set it 0
+ucNumberOfCyclesInPeriod:   Indicate how many cycles when PWM duty is 100%. low 8 bits of the value. 
+ucNumberOfCyclesInPeriodHi: Indicate how many cycles when PWM duty is 100%. high 8 bits of the value.If the PWM has an inverter,set bit [7]==1,otherwise set it 0 
 
 ucMaxNBVoltageHigh:     Voltage regulator dependent PWM value. High 8 bits of  the value for the max voltage.Set this one to 0xFF if VC without PWM. Set this to 0x0 if no VC at all.
 ucMinNBVoltageHigh:     Voltage regulator dependent PWM value. High 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all.
 
+
 usInterNBVoltageLow:    Voltage regulator dependent PWM value. The value makes the the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all.
 usInterNBVoltageHigh:   Voltage regulator dependent PWM value. The value makes the the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all.
 */
 
+
 /*
 The following IGP table is introduced from RS780, which is supposed to be put by SBIOS in FB before IGP VBIOS starts VPOST;
-Then VBIOS will copy the whole structure to its image so all GPU SW components can access this data structure to get whatever they need.
+Then VBIOS will copy the whole structure to its image so all GPU SW components can access this data structure to get whatever they need. 
 The enough reservation should allow us to never change table revisions. Whenever needed, a GPU SW component can use reserved portion for new data entries.
 
 SW components can access the IGP system infor structure in the same way as before
 */
 
-typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ULONG ulBootUpEngineClock;	/* in 10kHz unit */
-	ULONG ulReserved1[2];	/* must be 0x0 for the reserved */
-	ULONG ulBootUpUMAClock;	/* in 10kHz unit */
-	ULONG ulBootUpSidePortClock;	/* in 10kHz unit */
-	ULONG ulMinSidePortClock;	/* in 10kHz unit */
-	ULONG ulReserved2[6];	/* must be 0x0 for the reserved */
-	ULONG ulSystemConfig;	/* see explanation below */
-	ULONG ulBootUpReqDisplayVector;
-	ULONG ulOtherDisplayMisc;
-	ULONG ulDDISlot1Config;
-	ULONG ulDDISlot2Config;
-	UCHAR ucMemoryType;	/* [3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved */
-	UCHAR ucUMAChannelNumber;
-	UCHAR ucDockingPinBit;
-	UCHAR ucDockingPinPolarity;
-	ULONG ulDockingPinCFGInfo;
-	ULONG ulCPUCapInfo;
-	USHORT usNumberOfCyclesInPeriod;
-	USHORT usMaxNBVoltage;
-	USHORT usMinNBVoltage;
-	USHORT usBootUpNBVoltage;
-	ULONG ulHTLinkFreq;	/* in 10Khz */
-	USHORT usMinHTLinkWidth;
-	USHORT usMaxHTLinkWidth;
-	USHORT usUMASyncStartDelay;
-	USHORT usUMADataReturnTime;
-	USHORT usLinkStatusZeroTime;
-	USHORT usReserved;
-	ULONG ulHighVoltageHTLinkFreq;	/*  in 10Khz */
-	ULONG ulLowVoltageHTLinkFreq;	/*  in 10Khz */
-	USHORT usMaxUpStreamHTLinkWidth;
-	USHORT usMaxDownStreamHTLinkWidth;
-	USHORT usMinUpStreamHTLinkWidth;
-	USHORT usMinDownStreamHTLinkWidth;
-	ULONG ulReserved3[97];	/* must be 0x0 */
-} ATOM_INTEGRATED_SYSTEM_INFO_V2;
+
+typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V2
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+  ULONG	                     ulBootUpEngineClock;       //in 10kHz unit
+  ULONG			     ulReserved1[2];            //must be 0x0 for the reserved
+  ULONG	                     ulBootUpUMAClock;          //in 10kHz unit
+  ULONG	                     ulBootUpSidePortClock;     //in 10kHz unit
+  ULONG	                     ulMinSidePortClock;        //in 10kHz unit
+  ULONG			     ulReserved2[6];            //must be 0x0 for the reserved
+  ULONG                      ulSystemConfig;            //see explanation below
+  ULONG                      ulBootUpReqDisplayVector;
+  ULONG                      ulOtherDisplayMisc;
+  ULONG                      ulDDISlot1Config;
+  ULONG                      ulDDISlot2Config;
+  UCHAR                      ucMemoryType;              //[3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved
+  UCHAR                      ucUMAChannelNumber;
+  UCHAR                      ucDockingPinBit;
+  UCHAR                      ucDockingPinPolarity;
+  ULONG                      ulDockingPinCFGInfo;
+  ULONG                      ulCPUCapInfo;
+  USHORT                     usNumberOfCyclesInPeriod;
+  USHORT                     usMaxNBVoltage;
+  USHORT                     usMinNBVoltage;
+  USHORT                     usBootUpNBVoltage;
+  ULONG                      ulHTLinkFreq;              //in 10Khz
+  USHORT                     usMinHTLinkWidth;
+  USHORT                     usMaxHTLinkWidth;
+  USHORT                     usUMASyncStartDelay;
+  USHORT                     usUMADataReturnTime;
+  USHORT                     usLinkStatusZeroTime;
+  USHORT                     usDACEfuse;				//for storing badgap value (for RS880 only)
+  ULONG                      ulHighVoltageHTLinkFreq;     // in 10Khz
+  ULONG                      ulLowVoltageHTLinkFreq;      // in 10Khz
+  USHORT                     usMaxUpStreamHTLinkWidth;
+  USHORT                     usMaxDownStreamHTLinkWidth;
+  USHORT                     usMinUpStreamHTLinkWidth;
+  USHORT                     usMinDownStreamHTLinkWidth;
+  USHORT                     usFirmwareVersion;         //0 means FW is not supported. Otherwise it's the FW version loaded by SBIOS and driver should enable FW.
+  USHORT                     usFullT0Time;             // Input to calculate minimum HT link change time required by NB P-State. Unit is 0.01us.
+  ULONG                      ulReserved3[96];          //must be 0x0
+}ATOM_INTEGRATED_SYSTEM_INFO_V2;   
 
 /*
 ulBootUpEngineClock:   Boot-up Engine Clock in 10Khz;
 ulBootUpUMAClock:      Boot-up UMA Clock in 10Khz; it must be 0x0 when UMA is not present
 ulBootUpSidePortClock: Boot-up SidePort Clock in 10Khz; it must be 0x0 when SidePort Memory is not present,this could be equal to or less than maximum supported Sideport memory clock
 
-ulSystemConfig:
-Bit[0]=1: PowerExpress mode =0 Non-PowerExpress mode;
+ulSystemConfig:  
+Bit[0]=1: PowerExpress mode =0 Non-PowerExpress mode; 
 Bit[1]=1: system boots up at AMD overdrived state or user customized  mode. In this case, driver will just stick to this boot-up mode. No other PowerPlay state
       =0: system boots up at driver control state. Power state depends on PowerPlay table.
 Bit[2]=1: PWM method is used on NB voltage control. =0: GPIO method is used.
 Bit[3]=1: Only one power state(Performance) will be supported.
       =0: Multiple power states supported from PowerPlay table.
-Bit[4]=1: CLMC is supported and enabled on current system.
-      =0: CLMC is not supported or enabled on current system. SBIOS need to support HT link/freq change through ATIF interface.
-Bit[5]=1: Enable CDLW for all driver control power states. Max HT width is from SBIOS, while Min HT width is determined by display requirement.
+Bit[4]=1: CLMC is supported and enabled on current system. 
+      =0: CLMC is not supported or enabled on current system. SBIOS need to support HT link/freq change through ATIF interface.  
+Bit[5]=1: Enable CDLW for all driver control power states. Max HT width is from SBIOS, while Min HT width is determined by display requirement.  
       =0: CDLW is disabled. If CLMC is enabled case, Min HT width will be set equal to Max HT width. If CLMC disabled case, Max HT width will be applied.
 Bit[6]=1: High Voltage requested for all power states. In this case, voltage will be forced at 1.1v and powerplay table voltage drop/throttling request will be ignored.
       =0: Voltage settings is determined by powerplay table.
 Bit[7]=1: Enable CLMC as hybrid Mode. CDLD and CILR will be disabled in this case and we're using legacy C1E. This is workaround for CPU(Griffin) performance issue.
       =0: Enable CLMC as regular mode, CDLD and CILR will be enabled.
+Bit[8]=1: CDLF is supported and enabled on current system.
+      =0: CDLF is not supported or enabled on current system.
+Bit[9]=1: DLL Shut Down feature is enabled on current system.
+      =0: DLL Shut Down feature is not enabled or supported on current system.
 
 ulBootUpReqDisplayVector: This dword is a bit vector indicates what display devices are requested during boot-up. Refer to ATOM_DEVICE_xxx_SUPPORT for the bit vector definitions.
 
 ulOtherDisplayMisc: [15:8]- Bootup LCD Expansion selection; 0-center, 1-full panel size expansion;
-			              [7:0] - BootupTV standard selection; This is a bit vector to indicate what TV standards are supported by the system. Refer to ucTVSuppportedStd definition;
+			              [7:0] - BootupTV standard selection; This is a bit vector to indicate what TV standards are supported by the system. Refer to ucTVSupportedStd definition;
 
 ulDDISlot1Config: Describes the PCIE lane configuration on this DDI PCIE slot (ADD2 card) or connector (Mobile design).
       [3:0]  - Bit vector to indicate PCIE lane config of the DDI slot/connector on chassis (bit 0=1 lane 3:0; bit 1=1 lane 7:4; bit 2=1 lane 11:8; bit 3=1 lane 15:12)
-			[7:4]  - Bit vector to indicate PCIE lane config of the same DDI slot/connector on docking station (bit 0=1 lane 3:0; bit 1=1 lane 7:4; bit 2=1 lane 11:8; bit 3=1 lane 15:12)
-			[15:8] - Lane configuration attribute;
+			[7:4]  - Bit vector to indicate PCIE lane config of the same DDI slot/connector on docking station (bit 4=1 lane 3:0; bit 5=1 lane 7:4; bit 6=1 lane 11:8; bit 7=1 lane 15:12)
+      When a DDI connector is not "paired" (meaming two connections mutualexclusive on chassis or docking, only one of them can be connected at one time.
+      in both chassis and docking, SBIOS has to duplicate the same PCIE lane info from chassis to docking or vice versa. For example:
+      one DDI connector is only populated in docking with PCIE lane 8-11, but there is no paired connection on chassis, SBIOS has to copy bit 6 to bit 2.
+
+			[15:8] - Lane configuration attribute; 
       [23:16]- Connector type, possible value:
                CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D
                CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_D
                CONNECTOR_OBJECT_ID_HDMI_TYPE_A
                CONNECTOR_OBJECT_ID_DISPLAYPORT
+               CONNECTOR_OBJECT_ID_eDP
 			[31:24]- Reserved
 
 ulDDISlot2Config: Same as Slot1.
@@ -1787,29 +2208,31 @@
 
 ucUMAChannelNumber:  how many channels for the UMA;
 
-ulDockingPinCFGInfo: [15:0]-Bus/Device/Function # to CFG to read this Docking Pin; [31:16]-reg offset in CFG to read this pin
+ulDockingPinCFGInfo: [15:0]-Bus/Device/Function # to CFG to read this Docking Pin; [31:16]-reg offset in CFG to read this pin 
 ucDockingPinBit:     which bit in this register to read the pin status;
 ucDockingPinPolarity:Polarity of the pin when docked;
 
 ulCPUCapInfo:        [7:0]=1:Griffin;[7:0]=2:Greyhound;[7:0]=3:K8, other bits reserved for now and must be 0x0
 
 usNumberOfCyclesInPeriod:Indicate how many cycles when PWM duty is 100%.
-usMaxNBVoltage:Max. voltage control value in either PWM or GPIO mode.
+
+usMaxNBVoltage:Max. voltage control value in either PWM or GPIO mode. 
 usMinNBVoltage:Min. voltage control value in either PWM or GPIO mode.
                     GPIO mode: both usMaxNBVoltage & usMinNBVoltage have a valid value ulSystemConfig.SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE=0
                     PWM mode: both usMaxNBVoltage & usMinNBVoltage have a valid value ulSystemConfig.SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE=1
                     GPU SW don't control mode: usMaxNBVoltage & usMinNBVoltage=0 and no care about ulSystemConfig.SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE
+
 usBootUpNBVoltage:Boot-up voltage regulator dependent PWM value.
 
 ulHTLinkFreq:       Bootup HT link Frequency in 10Khz.
-usMinHTLinkWidth:   Bootup minimum HT link width. If CDLW disabled, this is equal to usMaxHTLinkWidth.
+usMinHTLinkWidth:   Bootup minimum HT link width. If CDLW disabled, this is equal to usMaxHTLinkWidth. 
                     If CDLW enabled, both upstream and downstream width should be the same during bootup.
-usMaxHTLinkWidth:   Bootup maximum HT link width. If CDLW disabled, this is equal to usMinHTLinkWidth.
-                    If CDLW enabled, both upstream and downstream width should be the same during bootup.
+usMaxHTLinkWidth:   Bootup maximum HT link width. If CDLW disabled, this is equal to usMinHTLinkWidth. 
+                    If CDLW enabled, both upstream and downstream width should be the same during bootup.  
 
-usUMASyncStartDelay: Memory access latency, required for watermark calculation
+usUMASyncStartDelay: Memory access latency, required for watermark calculation 
 usUMADataReturnTime: Memory access latency, required for watermark calculation
-usLinkStatusZeroTime:Memory access latency required for watermark calculation, set this to 0x0 for K8 CPU, set a proper value in 0.01 the unit of us
+usLinkStatusZeroTime:Memory access latency required for watermark calculation, set this to 0x0 for K8 CPU, set a proper value in 0.01 the unit of us 
 for Griffin or Greyhound. SBIOS needs to convert to actual time by:
                      if T0Ttime [5:4]=00b, then usLinkStatusZeroTime=T0Ttime [3:0]*0.1us (0.0 to 1.5us)
                      if T0Ttime [5:4]=01b, then usLinkStatusZeroTime=T0Ttime [3:0]*0.5us (0.0 to 7.5us)
@@ -1817,7 +2240,7 @@
                      if T0Ttime [5:4]=11b, and T0Ttime [3:0]=0x0 to 0xa, then usLinkStatusZeroTime=T0Ttime [3:0]*20us (0.0 to 200us)
 
 ulHighVoltageHTLinkFreq:     HT link frequency for power state with low voltage. If boot up runs in HT1, this must be 0.
-                             This must be less than or equal to ulHTLinkFreq(bootup frequency).
+                             This must be less than or equal to ulHTLinkFreq(bootup frequency). 
 ulLowVoltageHTLinkFreq:      HT link frequency for power state with low voltage or voltage scaling 1.0v~1.1v. If boot up runs in HT1, this must be 0.
                              This must be less than or equal to ulHighVoltageHTLinkFreq.
 
@@ -1827,14 +2250,17 @@
 usMinDownStreamHTLinkWidth:  same as above.
 */
 
+
 #define SYSTEM_CONFIG_POWEREXPRESS_ENABLE                 0x00000001
 #define SYSTEM_CONFIG_RUN_AT_OVERDRIVE_ENGINE             0x00000002
-#define SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE                  0x00000004
+#define SYSTEM_CONFIG_USE_PWM_ON_VOLTAGE                  0x00000004 
 #define SYSTEM_CONFIG_PERFORMANCE_POWERSTATE_ONLY         0x00000008
 #define SYSTEM_CONFIG_CLMC_ENABLED                        0x00000010
 #define SYSTEM_CONFIG_CDLW_ENABLED                        0x00000020
 #define SYSTEM_CONFIG_HIGH_VOLTAGE_REQUESTED              0x00000040
 #define SYSTEM_CONFIG_CLMC_HYBRID_MODE_ENABLED            0x00000080
+#define SYSTEM_CONFIG_CDLF_ENABLED                        0x00000100
+#define SYSTEM_CONFIG_DLL_SHUTDOWN_ENABLED                0x00000200
 
 #define IGP_DDI_SLOT_LANE_CONFIG_MASK                     0x000000FF
 
@@ -1851,6 +2277,41 @@
 
 #define IGP_DDI_SLOT_CONNECTOR_TYPE_MASK                  0x00FF0000
 
+// IntegratedSystemInfoTable new Rev is V5 after V2, because of the real rev of V2 is v1.4. This rev is used for RR
+typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V5
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+  ULONG	                     ulBootUpEngineClock;       //in 10kHz unit
+  ULONG                      ulDentistVCOFreq;          //Dentist VCO clock in 10kHz unit, the source of GPU SCLK, LCLK, UCLK and VCLK. 
+  ULONG                      ulLClockFreq;              //GPU Lclk freq in 10kHz unit, have relationship with NCLK in NorthBridge
+  ULONG	                     ulBootUpUMAClock;          //in 10kHz unit
+  ULONG                      ulReserved1[8];            //must be 0x0 for the reserved
+  ULONG                      ulBootUpReqDisplayVector;
+  ULONG                      ulOtherDisplayMisc;
+  ULONG                      ulReserved2[4];            //must be 0x0 for the reserved
+  ULONG                      ulSystemConfig;            //TBD
+  ULONG                      ulCPUCapInfo;              //TBD
+  USHORT                     usMaxNBVoltage;            //high NB voltage, calculated using current VDDNB (D24F2xDC) and VDDNB offset fuse;
+  USHORT                     usMinNBVoltage;            //low NB voltage, calculated using current VDDNB (D24F2xDC) and VDDNB offset fuse;
+  USHORT                     usBootUpNBVoltage;         //boot up NB voltage
+  UCHAR                      ucHtcTmpLmt;               //bit [22:16] of D24F3x64 Hardware Thermal Control (HTC) Register, may not be needed, TBD
+  UCHAR                      ucTjOffset;                //bit [28:22] of D24F3xE4 Thermtrip Status Register,may not be needed, TBD
+  ULONG                      ulReserved3[4];            //must be 0x0 for the reserved
+  ULONG                      ulDDISlot1Config;          //see above ulDDISlot1Config definition
+  ULONG                      ulDDISlot2Config;
+  ULONG                      ulDDISlot3Config;
+  ULONG                      ulDDISlot4Config;
+  ULONG                      ulReserved4[4];            //must be 0x0 for the reserved
+  UCHAR                      ucMemoryType;              //[3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved
+  UCHAR                      ucUMAChannelNumber;
+  USHORT                     usReserved;
+  ULONG                      ulReserved5[4];            //must be 0x0 for the reserved
+  ULONG                      ulCSR_M3_ARB_CNTL_DEFAULT[10];//arrays with values for CSR M3 arbiter for default
+  ULONG                      ulCSR_M3_ARB_CNTL_UVD[10]; //arrays with values for CSR M3 arbiter for UVD playback
+  ULONG                      ulCSR_M3_ARB_CNTL_FS3D[10];//arrays with values for CSR M3 arbiter for Full Screen 3D applications
+  ULONG                      ulReserved6[61];           //must be 0x0
+}ATOM_INTEGRATED_SYSTEM_INFO_V5;   
+
 #define ATOM_CRT_INT_ENCODER1_INDEX                       0x00000000
 #define ATOM_LCD_INT_ENCODER1_INDEX                       0x00000001
 #define ATOM_TV_INT_ENCODER1_INDEX                        0x00000002
@@ -1866,8 +2327,8 @@
 #define ATOM_DFP_INT_ENCODER3_INDEX                       0x0000000C
 #define ATOM_DFP_INT_ENCODER4_INDEX                       0x0000000D
 
-/*  define ASIC internal encoder id ( bit vector ) */
-#define ASIC_INT_DAC1_ENCODER_ID											0x00
+// define ASIC internal encoder id ( bit vector ), used for CRTC_SourceSelTable
+#define ASIC_INT_DAC1_ENCODER_ID    											0x00 
 #define ASIC_INT_TV_ENCODER_ID														0x02
 #define ASIC_INT_DIG1_ENCODER_ID													0x03
 #define ASIC_INT_DAC2_ENCODER_ID													0x04
@@ -1875,10 +2336,24 @@
 #define ASIC_INT_DVO_ENCODER_ID														0x07
 #define ASIC_INT_DIG2_ENCODER_ID													0x09
 #define ASIC_EXT_DIG_ENCODER_ID														0x05
+#define ASIC_EXT_DIG2_ENCODER_ID													0x08
+#define ASIC_INT_DIG3_ENCODER_ID													0x0a
+#define ASIC_INT_DIG4_ENCODER_ID													0x0b
+#define ASIC_INT_DIG5_ENCODER_ID													0x0c
+#define ASIC_INT_DIG6_ENCODER_ID													0x0d
 
-/* define Encoder attribute */
+//define Encoder attribute
 #define ATOM_ANALOG_ENCODER																0
-#define ATOM_DIGITAL_ENCODER															1
+#define ATOM_DIGITAL_ENCODER															1		
+#define ATOM_DP_ENCODER															      2		
+
+#define ATOM_ENCODER_ENUM_MASK                            0x70
+#define ATOM_ENCODER_ENUM_ID1                             0x00
+#define ATOM_ENCODER_ENUM_ID2                             0x10
+#define ATOM_ENCODER_ENUM_ID3                             0x20
+#define ATOM_ENCODER_ENUM_ID4                             0x30
+#define ATOM_ENCODER_ENUM_ID5                             0x40 
+#define ATOM_ENCODER_ENUM_ID6                             0x50
 
 #define ATOM_DEVICE_CRT1_INDEX                            0x00000000
 #define ATOM_DEVICE_LCD1_INDEX                            0x00000001
@@ -1886,45 +2361,40 @@
 #define ATOM_DEVICE_DFP1_INDEX                            0x00000003
 #define ATOM_DEVICE_CRT2_INDEX                            0x00000004
 #define ATOM_DEVICE_LCD2_INDEX                            0x00000005
-#define ATOM_DEVICE_TV2_INDEX                             0x00000006
+#define ATOM_DEVICE_DFP6_INDEX                            0x00000006
 #define ATOM_DEVICE_DFP2_INDEX                            0x00000007
 #define ATOM_DEVICE_CV_INDEX                              0x00000008
-#define ATOM_DEVICE_DFP3_INDEX														0x00000009
-#define ATOM_DEVICE_DFP4_INDEX														0x0000000A
-#define ATOM_DEVICE_DFP5_INDEX														0x0000000B
+#define ATOM_DEVICE_DFP3_INDEX                            0x00000009
+#define ATOM_DEVICE_DFP4_INDEX                            0x0000000A
+#define ATOM_DEVICE_DFP5_INDEX                            0x0000000B
+
 #define ATOM_DEVICE_RESERVEDC_INDEX                       0x0000000C
 #define ATOM_DEVICE_RESERVEDD_INDEX                       0x0000000D
 #define ATOM_DEVICE_RESERVEDE_INDEX                       0x0000000E
 #define ATOM_DEVICE_RESERVEDF_INDEX                       0x0000000F
 #define ATOM_MAX_SUPPORTED_DEVICE_INFO                    (ATOM_DEVICE_DFP3_INDEX+1)
 #define ATOM_MAX_SUPPORTED_DEVICE_INFO_2                  ATOM_MAX_SUPPORTED_DEVICE_INFO
-#define ATOM_MAX_SUPPORTED_DEVICE_INFO_3                  (ATOM_DEVICE_DFP5_INDEX + 1)
+#define ATOM_MAX_SUPPORTED_DEVICE_INFO_3                  (ATOM_DEVICE_DFP5_INDEX + 1 )
 
 #define ATOM_MAX_SUPPORTED_DEVICE                         (ATOM_DEVICE_RESERVEDF_INDEX+1)
 
-#define ATOM_DEVICE_CRT1_SUPPORT                          (0x1L << ATOM_DEVICE_CRT1_INDEX)
-#define ATOM_DEVICE_LCD1_SUPPORT                          (0x1L << ATOM_DEVICE_LCD1_INDEX)
-#define ATOM_DEVICE_TV1_SUPPORT                           (0x1L << ATOM_DEVICE_TV1_INDEX)
-#define ATOM_DEVICE_DFP1_SUPPORT                          (0x1L << ATOM_DEVICE_DFP1_INDEX)
-#define ATOM_DEVICE_CRT2_SUPPORT                          (0x1L << ATOM_DEVICE_CRT2_INDEX)
-#define ATOM_DEVICE_LCD2_SUPPORT                          (0x1L << ATOM_DEVICE_LCD2_INDEX)
-#define ATOM_DEVICE_TV2_SUPPORT                           (0x1L << ATOM_DEVICE_TV2_INDEX)
-#define ATOM_DEVICE_DFP2_SUPPORT                          (0x1L << ATOM_DEVICE_DFP2_INDEX)
-#define ATOM_DEVICE_CV_SUPPORT                            (0x1L << ATOM_DEVICE_CV_INDEX)
-#define ATOM_DEVICE_DFP3_SUPPORT													(0x1L << ATOM_DEVICE_DFP3_INDEX)
-#define ATOM_DEVICE_DFP4_SUPPORT													(0x1L << ATOM_DEVICE_DFP4_INDEX )
-#define ATOM_DEVICE_DFP5_SUPPORT													(0x1L << ATOM_DEVICE_DFP5_INDEX)
+#define ATOM_DEVICE_CRT1_SUPPORT                          (0x1L << ATOM_DEVICE_CRT1_INDEX )
+#define ATOM_DEVICE_LCD1_SUPPORT                          (0x1L << ATOM_DEVICE_LCD1_INDEX )
+#define ATOM_DEVICE_TV1_SUPPORT                           (0x1L << ATOM_DEVICE_TV1_INDEX  )
+#define ATOM_DEVICE_DFP1_SUPPORT                          (0x1L << ATOM_DEVICE_DFP1_INDEX )
+#define ATOM_DEVICE_CRT2_SUPPORT                          (0x1L << ATOM_DEVICE_CRT2_INDEX )
+#define ATOM_DEVICE_LCD2_SUPPORT                          (0x1L << ATOM_DEVICE_LCD2_INDEX )
+#define ATOM_DEVICE_DFP6_SUPPORT                          (0x1L << ATOM_DEVICE_DFP6_INDEX )
+#define ATOM_DEVICE_DFP2_SUPPORT                          (0x1L << ATOM_DEVICE_DFP2_INDEX )
+#define ATOM_DEVICE_CV_SUPPORT                            (0x1L << ATOM_DEVICE_CV_INDEX   )
+#define ATOM_DEVICE_DFP3_SUPPORT                          (0x1L << ATOM_DEVICE_DFP3_INDEX )
+#define ATOM_DEVICE_DFP4_SUPPORT                          (0x1L << ATOM_DEVICE_DFP4_INDEX )
+#define ATOM_DEVICE_DFP5_SUPPORT                          (0x1L << ATOM_DEVICE_DFP5_INDEX )
 
-#define ATOM_DEVICE_CRT_SUPPORT \
-	(ATOM_DEVICE_CRT1_SUPPORT | ATOM_DEVICE_CRT2_SUPPORT)
-#define ATOM_DEVICE_DFP_SUPPORT \
-	(ATOM_DEVICE_DFP1_SUPPORT | ATOM_DEVICE_DFP2_SUPPORT | \
-	 ATOM_DEVICE_DFP3_SUPPORT | ATOM_DEVICE_DFP4_SUPPORT | \
-	 ATOM_DEVICE_DFP5_SUPPORT)
-#define ATOM_DEVICE_TV_SUPPORT \
-	(ATOM_DEVICE_TV1_SUPPORT  | ATOM_DEVICE_TV2_SUPPORT)
-#define ATOM_DEVICE_LCD_SUPPORT \
-	(ATOM_DEVICE_LCD1_SUPPORT | ATOM_DEVICE_LCD2_SUPPORT)
+#define ATOM_DEVICE_CRT_SUPPORT                           (ATOM_DEVICE_CRT1_SUPPORT | ATOM_DEVICE_CRT2_SUPPORT)
+#define ATOM_DEVICE_DFP_SUPPORT                           (ATOM_DEVICE_DFP1_SUPPORT | ATOM_DEVICE_DFP2_SUPPORT |  ATOM_DEVICE_DFP3_SUPPORT | ATOM_DEVICE_DFP4_SUPPORT | ATOM_DEVICE_DFP5_SUPPORT | ATOM_DEVICE_DFP6_SUPPORT)
+#define ATOM_DEVICE_TV_SUPPORT                            (ATOM_DEVICE_TV1_SUPPORT)
+#define ATOM_DEVICE_LCD_SUPPORT                           (ATOM_DEVICE_LCD1_SUPPORT | ATOM_DEVICE_LCD2_SUPPORT)
 
 #define ATOM_DEVICE_CONNECTOR_TYPE_MASK                   0x000000F0
 #define ATOM_DEVICE_CONNECTOR_TYPE_SHIFT                  0x00000004
@@ -1942,6 +2412,7 @@
 #define ATOM_DEVICE_CONNECTOR_CASE_1                      0x0000000E
 #define ATOM_DEVICE_CONNECTOR_DISPLAYPORT                 0x0000000F
 
+
 #define ATOM_DEVICE_DAC_INFO_MASK                         0x0000000F
 #define ATOM_DEVICE_DAC_INFO_SHIFT                        0x00000000
 #define ATOM_DEVICE_DAC_INFO_NODAC                        0x00000000
@@ -1958,139 +2429,150 @@
 #define ATOM_DEVICE_I2C_ID_SHIFT                          0x00000004
 #define ATOM_DEVICE_I2C_ID_IS_FOR_NON_MM_USE              0x00000001
 #define ATOM_DEVICE_I2C_ID_IS_FOR_MM_USE                  0x00000002
-#define ATOM_DEVICE_I2C_ID_IS_FOR_SDVO_USE                0x00000003	/* For IGP RS600 */
-#define ATOM_DEVICE_I2C_ID_IS_FOR_DAC_SCL                 0x00000004	/* For IGP RS690 */
+#define ATOM_DEVICE_I2C_ID_IS_FOR_SDVO_USE                0x00000003    //For IGP RS600
+#define ATOM_DEVICE_I2C_ID_IS_FOR_DAC_SCL                 0x00000004    //For IGP RS690
 
 #define ATOM_DEVICE_I2C_HARDWARE_CAP_MASK                 0x00000080
 #define ATOM_DEVICE_I2C_HARDWARE_CAP_SHIFT                0x00000007
 #define	ATOM_DEVICE_USES_SOFTWARE_ASSISTED_I2C            0x00000000
 #define	ATOM_DEVICE_USES_HARDWARE_ASSISTED_I2C            0x00000001
 
-/*   usDeviceSupport: */
-/*   Bits0       = 0 - no CRT1 support= 1- CRT1 is supported */
-/*   Bit 1       = 0 - no LCD1 support= 1- LCD1 is supported */
-/*   Bit 2       = 0 - no TV1  support= 1- TV1  is supported */
-/*   Bit 3       = 0 - no DFP1 support= 1- DFP1 is supported */
-/*   Bit 4       = 0 - no CRT2 support= 1- CRT2 is supported */
-/*   Bit 5       = 0 - no LCD2 support= 1- LCD2 is supported */
-/*   Bit 6       = 0 - no TV2  support= 1- TV2  is supported */
-/*   Bit 7       = 0 - no DFP2 support= 1- DFP2 is supported */
-/*   Bit 8       = 0 - no CV   support= 1- CV   is supported */
-/*   Bit 9       = 0 - no DFP3 support= 1- DFP3 is supported */
-/*   Byte1 (Supported Device Info) */
-/*   Bit 0       = = 0 - no CV support= 1- CV is supported */
-/*  */
-/*  */
+//  usDeviceSupport:
+//  Bits0	= 0 - no CRT1 support= 1- CRT1 is supported
+//  Bit 1	= 0 - no LCD1 support= 1- LCD1 is supported
+//  Bit 2	= 0 - no TV1  support= 1- TV1  is supported
+//  Bit 3	= 0 - no DFP1 support= 1- DFP1 is supported
+//  Bit 4	= 0 - no CRT2 support= 1- CRT2 is supported
+//  Bit 5	= 0 - no LCD2 support= 1- LCD2 is supported
+//  Bit 6	= 0 - no DFP6 support= 1- DFP6 is supported
+//  Bit 7	= 0 - no DFP2 support= 1- DFP2 is supported
+//  Bit 8	= 0 - no CV   support= 1- CV   is supported
+//  Bit 9	= 0 - no DFP3 support= 1- DFP3 is supported
+//  Bit 10      = 0 - no DFP4 support= 1- DFP4 is supported
+//  Bit 11      = 0 - no DFP5 support= 1- DFP5 is supported
+//   
+//  
 
-/*               ucI2C_ConfigID */
-/*     [7:0] - I2C LINE Associate ID */
-/*           = 0   - no I2C */
-/*     [7]               -       HW_Cap        = 1,  [6:0]=HW assisted I2C ID(HW line selection) */
-/*                           =   0,  [6:0]=SW assisted I2C ID */
-/*     [6-4]     - HW_ENGINE_ID  =       1,  HW engine for NON multimedia use */
-/*                           =   2,      HW engine for Multimedia use */
-/*                           =   3-7     Reserved for future I2C engines */
-/*               [3-0] - I2C_LINE_MUX  = A Mux number when it's HW assisted I2C or GPIO ID when it's SW I2C */
+/****************************************************************************/
+/* Structure used in MclkSS_InfoTable                                       */
+/****************************************************************************/
+//		ucI2C_ConfigID
+//    [7:0] - I2C LINE Associate ID
+//          = 0   - no I2C
+//    [7]		-	HW_Cap        =	1,  [6:0]=HW assisted I2C ID(HW line selection)
+//                          =	0,  [6:0]=SW assisted I2C ID
+//    [6-4]	- HW_ENGINE_ID  =	1,  HW engine for NON multimedia use
+//                          =	2,	HW engine for Multimedia use
+//                          =	3-7	Reserved for future I2C engines
+//		[3-0] - I2C_LINE_MUX  = A Mux number when it's HW assisted I2C or GPIO ID when it's SW I2C
 
-typedef struct _ATOM_I2C_ID_CONFIG {
+typedef struct _ATOM_I2C_ID_CONFIG
+{
 #if ATOM_BIG_ENDIAN
-	UCHAR bfHW_Capable:1;
-	UCHAR bfHW_EngineID:3;
-	UCHAR bfI2C_LineMux:4;
+  UCHAR   bfHW_Capable:1;
+  UCHAR   bfHW_EngineID:3;
+  UCHAR   bfI2C_LineMux:4;
 #else
-	UCHAR bfI2C_LineMux:4;
-	UCHAR bfHW_EngineID:3;
-	UCHAR bfHW_Capable:1;
+  UCHAR   bfI2C_LineMux:4;
+  UCHAR   bfHW_EngineID:3;
+  UCHAR   bfHW_Capable:1;
 #endif
-} ATOM_I2C_ID_CONFIG;
+}ATOM_I2C_ID_CONFIG;
 
-typedef union _ATOM_I2C_ID_CONFIG_ACCESS {
-	ATOM_I2C_ID_CONFIG sbfAccess;
-	UCHAR ucAccess;
-} ATOM_I2C_ID_CONFIG_ACCESS;
+typedef union _ATOM_I2C_ID_CONFIG_ACCESS
+{
+  ATOM_I2C_ID_CONFIG sbfAccess;
+  UCHAR              ucAccess;
+}ATOM_I2C_ID_CONFIG_ACCESS;
+   
 
-/****************************************************************************/
-/*  Structure used in GPIO_I2C_InfoTable */
-/****************************************************************************/
-typedef struct _ATOM_GPIO_I2C_ASSIGMENT {
-	USHORT usClkMaskRegisterIndex;
-	USHORT usClkEnRegisterIndex;
-	USHORT usClkY_RegisterIndex;
-	USHORT usClkA_RegisterIndex;
-	USHORT usDataMaskRegisterIndex;
-	USHORT usDataEnRegisterIndex;
-	USHORT usDataY_RegisterIndex;
-	USHORT usDataA_RegisterIndex;
-	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
-	UCHAR ucClkMaskShift;
-	UCHAR ucClkEnShift;
-	UCHAR ucClkY_Shift;
-	UCHAR ucClkA_Shift;
-	UCHAR ucDataMaskShift;
-	UCHAR ucDataEnShift;
-	UCHAR ucDataY_Shift;
-	UCHAR ucDataA_Shift;
-	UCHAR ucReserved1;
-	UCHAR ucReserved2;
-} ATOM_GPIO_I2C_ASSIGMENT;
+/****************************************************************************/	
+// Structure used in GPIO_I2C_InfoTable
+/****************************************************************************/	
+typedef struct _ATOM_GPIO_I2C_ASSIGMENT
+{
+  USHORT                    usClkMaskRegisterIndex;
+  USHORT                    usClkEnRegisterIndex;
+  USHORT                    usClkY_RegisterIndex;
+  USHORT                    usClkA_RegisterIndex;
+  USHORT                    usDataMaskRegisterIndex;
+  USHORT                    usDataEnRegisterIndex;
+  USHORT                    usDataY_RegisterIndex;
+  USHORT                    usDataA_RegisterIndex;
+  ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
+  UCHAR                     ucClkMaskShift;
+  UCHAR                     ucClkEnShift;
+  UCHAR                     ucClkY_Shift;
+  UCHAR                     ucClkA_Shift;
+  UCHAR                     ucDataMaskShift;
+  UCHAR                     ucDataEnShift;
+  UCHAR                     ucDataY_Shift;
+  UCHAR                     ucDataA_Shift;
+  UCHAR                     ucReserved1;
+  UCHAR                     ucReserved2;
+}ATOM_GPIO_I2C_ASSIGMENT;
 
-typedef struct _ATOM_GPIO_I2C_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_GPIO_I2C_ASSIGMENT asGPIO_Info[ATOM_MAX_SUPPORTED_DEVICE];
-} ATOM_GPIO_I2C_INFO;
+typedef struct _ATOM_GPIO_I2C_INFO
+{ 
+  ATOM_COMMON_TABLE_HEADER	sHeader;
+  ATOM_GPIO_I2C_ASSIGMENT   asGPIO_Info[ATOM_MAX_SUPPORTED_DEVICE];
+}ATOM_GPIO_I2C_INFO;
 
-/****************************************************************************/
-/*  Common Structure used in other structures */
-/****************************************************************************/
+/****************************************************************************/	
+// Common Structure used in other structures
+/****************************************************************************/	
 
 #ifndef _H2INC
-
-/* Please don't add or expand this bitfield structure below, this one will retire soon.! */
-typedef struct _ATOM_MODE_MISC_INFO {
+  
+//Please don't add or expand this bitfield structure below, this one will retire soon.!
+typedef struct _ATOM_MODE_MISC_INFO
+{ 
 #if ATOM_BIG_ENDIAN
-	USHORT Reserved:6;
-	USHORT RGB888:1;
-	USHORT DoubleClock:1;
-	USHORT Interlace:1;
-	USHORT CompositeSync:1;
-	USHORT V_ReplicationBy2:1;
-	USHORT H_ReplicationBy2:1;
-	USHORT VerticalCutOff:1;
-	USHORT VSyncPolarity:1;	/* 0=Active High, 1=Active Low */
-	USHORT HSyncPolarity:1;	/* 0=Active High, 1=Active Low */
-	USHORT HorizontalCutOff:1;
+  USHORT Reserved:6;
+  USHORT RGB888:1;
+  USHORT DoubleClock:1;
+  USHORT Interlace:1;
+  USHORT CompositeSync:1;
+  USHORT V_ReplicationBy2:1;
+  USHORT H_ReplicationBy2:1;
+  USHORT VerticalCutOff:1;
+  USHORT VSyncPolarity:1;      //0=Active High, 1=Active Low
+  USHORT HSyncPolarity:1;      //0=Active High, 1=Active Low
+  USHORT HorizontalCutOff:1;
 #else
-	USHORT HorizontalCutOff:1;
-	USHORT HSyncPolarity:1;	/* 0=Active High, 1=Active Low */
-	USHORT VSyncPolarity:1;	/* 0=Active High, 1=Active Low */
-	USHORT VerticalCutOff:1;
-	USHORT H_ReplicationBy2:1;
-	USHORT V_ReplicationBy2:1;
-	USHORT CompositeSync:1;
-	USHORT Interlace:1;
-	USHORT DoubleClock:1;
-	USHORT RGB888:1;
-	USHORT Reserved:6;
+  USHORT HorizontalCutOff:1;
+  USHORT HSyncPolarity:1;      //0=Active High, 1=Active Low
+  USHORT VSyncPolarity:1;      //0=Active High, 1=Active Low
+  USHORT VerticalCutOff:1;
+  USHORT H_ReplicationBy2:1;
+  USHORT V_ReplicationBy2:1;
+  USHORT CompositeSync:1;
+  USHORT Interlace:1;
+  USHORT DoubleClock:1;
+  USHORT RGB888:1;
+  USHORT Reserved:6;           
 #endif
-} ATOM_MODE_MISC_INFO;
-
-typedef union _ATOM_MODE_MISC_INFO_ACCESS {
-	ATOM_MODE_MISC_INFO sbfAccess;
-	USHORT usAccess;
-} ATOM_MODE_MISC_INFO_ACCESS;
-
+}ATOM_MODE_MISC_INFO;
+  
+typedef union _ATOM_MODE_MISC_INFO_ACCESS
+{ 
+  ATOM_MODE_MISC_INFO sbfAccess;
+  USHORT              usAccess;
+}ATOM_MODE_MISC_INFO_ACCESS;
+  
 #else
-
-typedef union _ATOM_MODE_MISC_INFO_ACCESS {
-	USHORT usAccess;
-} ATOM_MODE_MISC_INFO_ACCESS;
-
+  
+typedef union _ATOM_MODE_MISC_INFO_ACCESS
+{ 
+  USHORT              usAccess;
+}ATOM_MODE_MISC_INFO_ACCESS;
+   
 #endif
 
-/*  usModeMiscInfo- */
+// usModeMiscInfo-
 #define ATOM_H_CUTOFF           0x01
-#define ATOM_HSYNC_POLARITY     0x02	/* 0=Active High, 1=Active Low */
-#define ATOM_VSYNC_POLARITY     0x04	/* 0=Active High, 1=Active Low */
+#define ATOM_HSYNC_POLARITY     0x02             //0=Active High, 1=Active Low
+#define ATOM_VSYNC_POLARITY     0x04             //0=Active High, 1=Active Low
 #define ATOM_V_CUTOFF           0x08
 #define ATOM_H_REPLICATIONBY2   0x10
 #define ATOM_V_REPLICATIONBY2   0x20
@@ -2099,10 +2581,10 @@
 #define ATOM_DOUBLE_CLOCK_MODE  0x100
 #define ATOM_RGB888_MODE        0x200
 
-/* usRefreshRate- */
+//usRefreshRate-
 #define ATOM_REFRESH_43         43
 #define ATOM_REFRESH_47         47
-#define ATOM_REFRESH_56         56
+#define ATOM_REFRESH_56         56	
 #define ATOM_REFRESH_60         60
 #define ATOM_REFRESH_65         65
 #define ATOM_REFRESH_70         70
@@ -2110,192 +2592,233 @@
 #define ATOM_REFRESH_75         75
 #define ATOM_REFRESH_85         85
 
-/*  ATOM_MODE_TIMING data are exactly the same as VESA timing data. */
-/*  Translation from EDID to ATOM_MODE_TIMING, use the following formula. */
-/*  */
-/*       VESA_HTOTAL                     =       VESA_ACTIVE + 2* VESA_BORDER + VESA_BLANK */
-/*                                               =       EDID_HA + EDID_HBL */
-/*       VESA_HDISP                      =       VESA_ACTIVE     =       EDID_HA */
-/*       VESA_HSYNC_START        =       VESA_ACTIVE + VESA_BORDER + VESA_FRONT_PORCH */
-/*                                               =       EDID_HA + EDID_HSO */
-/*       VESA_HSYNC_WIDTH        =       VESA_HSYNC_TIME =       EDID_HSPW */
-/*       VESA_BORDER                     =       EDID_BORDER */
+// ATOM_MODE_TIMING data are exactly the same as VESA timing data.
+// Translation from EDID to ATOM_MODE_TIMING, use the following formula.
+//
+//	VESA_HTOTAL			=	VESA_ACTIVE + 2* VESA_BORDER + VESA_BLANK
+//						=	EDID_HA + EDID_HBL
+//	VESA_HDISP			=	VESA_ACTIVE	=	EDID_HA
+//	VESA_HSYNC_START	=	VESA_ACTIVE + VESA_BORDER + VESA_FRONT_PORCH
+//						=	EDID_HA + EDID_HSO
+//	VESA_HSYNC_WIDTH	=	VESA_HSYNC_TIME	=	EDID_HSPW
+//	VESA_BORDER			=	EDID_BORDER
 
-/****************************************************************************/
-/*  Structure used in SetCRTC_UsingDTDTimingTable */
-/****************************************************************************/
-typedef struct _SET_CRTC_USING_DTD_TIMING_PARAMETERS {
-	USHORT usH_Size;
-	USHORT usH_Blanking_Time;
-	USHORT usV_Size;
-	USHORT usV_Blanking_Time;
-	USHORT usH_SyncOffset;
-	USHORT usH_SyncWidth;
-	USHORT usV_SyncOffset;
-	USHORT usV_SyncWidth;
-	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
-	UCHAR ucH_Border;	/*  From DFP EDID */
-	UCHAR ucV_Border;
-	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucPadding[3];
-} SET_CRTC_USING_DTD_TIMING_PARAMETERS;
+/****************************************************************************/	
+// Structure used in SetCRTC_UsingDTDTimingTable
+/****************************************************************************/	
+typedef struct _SET_CRTC_USING_DTD_TIMING_PARAMETERS
+{
+  USHORT  usH_Size;
+  USHORT  usH_Blanking_Time;
+  USHORT  usV_Size;
+  USHORT  usV_Blanking_Time;			
+  USHORT  usH_SyncOffset;
+  USHORT  usH_SyncWidth;
+  USHORT  usV_SyncOffset;
+  USHORT  usV_SyncWidth;
+  ATOM_MODE_MISC_INFO_ACCESS  susModeMiscInfo;  
+  UCHAR   ucH_Border;         // From DFP EDID
+  UCHAR   ucV_Border;
+  UCHAR   ucCRTC;             // ATOM_CRTC1 or ATOM_CRTC2  
+  UCHAR   ucPadding[3];
+}SET_CRTC_USING_DTD_TIMING_PARAMETERS;
 
-/****************************************************************************/
-/*  Structure used in SetCRTC_TimingTable */
-/****************************************************************************/
-typedef struct _SET_CRTC_TIMING_PARAMETERS {
-	USHORT usH_Total;	/*  horizontal total */
-	USHORT usH_Disp;	/*  horizontal display */
-	USHORT usH_SyncStart;	/*  horozontal Sync start */
-	USHORT usH_SyncWidth;	/*  horizontal Sync width */
-	USHORT usV_Total;	/*  vertical total */
-	USHORT usV_Disp;	/*  vertical display */
-	USHORT usV_SyncStart;	/*  vertical Sync start */
-	USHORT usV_SyncWidth;	/*  vertical Sync width */
-	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
-	UCHAR ucCRTC;		/*  ATOM_CRTC1 or ATOM_CRTC2 */
-	UCHAR ucOverscanRight;	/*  right */
-	UCHAR ucOverscanLeft;	/*  left */
-	UCHAR ucOverscanBottom;	/*  bottom */
-	UCHAR ucOverscanTop;	/*  top */
-	UCHAR ucReserved;
-} SET_CRTC_TIMING_PARAMETERS;
+/****************************************************************************/	
+// Structure used in SetCRTC_TimingTable
+/****************************************************************************/	
+typedef struct _SET_CRTC_TIMING_PARAMETERS
+{
+  USHORT                      usH_Total;        // horizontal total
+  USHORT                      usH_Disp;         // horizontal display
+  USHORT                      usH_SyncStart;    // horozontal Sync start
+  USHORT                      usH_SyncWidth;    // horizontal Sync width
+  USHORT                      usV_Total;        // vertical total
+  USHORT                      usV_Disp;         // vertical display
+  USHORT                      usV_SyncStart;    // vertical Sync start
+  USHORT                      usV_SyncWidth;    // vertical Sync width
+  ATOM_MODE_MISC_INFO_ACCESS  susModeMiscInfo;
+  UCHAR                       ucCRTC;           // ATOM_CRTC1 or ATOM_CRTC2
+  UCHAR                       ucOverscanRight;  // right
+  UCHAR                       ucOverscanLeft;   // left
+  UCHAR                       ucOverscanBottom; // bottom
+  UCHAR                       ucOverscanTop;    // top
+  UCHAR                       ucReserved;
+}SET_CRTC_TIMING_PARAMETERS;
 #define SET_CRTC_TIMING_PARAMETERS_PS_ALLOCATION SET_CRTC_TIMING_PARAMETERS
 
-/****************************************************************************/
-/*  Structure used in StandardVESA_TimingTable */
-/*                    AnalogTV_InfoTable */
-/*                    ComponentVideoInfoTable */
-/****************************************************************************/
-typedef struct _ATOM_MODE_TIMING {
-	USHORT usCRTC_H_Total;
-	USHORT usCRTC_H_Disp;
-	USHORT usCRTC_H_SyncStart;
-	USHORT usCRTC_H_SyncWidth;
-	USHORT usCRTC_V_Total;
-	USHORT usCRTC_V_Disp;
-	USHORT usCRTC_V_SyncStart;
-	USHORT usCRTC_V_SyncWidth;
-	USHORT usPixelClock;	/* in 10Khz unit */
-	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
-	USHORT usCRTC_OverscanRight;
-	USHORT usCRTC_OverscanLeft;
-	USHORT usCRTC_OverscanBottom;
-	USHORT usCRTC_OverscanTop;
-	USHORT usReserve;
-	UCHAR ucInternalModeNumber;
-	UCHAR ucRefreshRate;
-} ATOM_MODE_TIMING;
+/****************************************************************************/	
+// Structure used in StandardVESA_TimingTable
+//                   AnalogTV_InfoTable 
+//                   ComponentVideoInfoTable
+/****************************************************************************/	
+typedef struct _ATOM_MODE_TIMING
+{
+  USHORT  usCRTC_H_Total;
+  USHORT  usCRTC_H_Disp;
+  USHORT  usCRTC_H_SyncStart;
+  USHORT  usCRTC_H_SyncWidth;
+  USHORT  usCRTC_V_Total;
+  USHORT  usCRTC_V_Disp;
+  USHORT  usCRTC_V_SyncStart;
+  USHORT  usCRTC_V_SyncWidth;
+  USHORT  usPixelClock;					                 //in 10Khz unit
+  ATOM_MODE_MISC_INFO_ACCESS  susModeMiscInfo;
+  USHORT  usCRTC_OverscanRight;
+  USHORT  usCRTC_OverscanLeft;
+  USHORT  usCRTC_OverscanBottom;
+  USHORT  usCRTC_OverscanTop;
+  USHORT  usReserve;
+  UCHAR   ucInternalModeNumber;
+  UCHAR   ucRefreshRate;
+}ATOM_MODE_TIMING;
 
-typedef struct _ATOM_DTD_FORMAT {
-	USHORT usPixClk;
-	USHORT usHActive;
-	USHORT usHBlanking_Time;
-	USHORT usVActive;
-	USHORT usVBlanking_Time;
-	USHORT usHSyncOffset;
-	USHORT usHSyncWidth;
-	USHORT usVSyncOffset;
-	USHORT usVSyncWidth;
-	USHORT usImageHSize;
-	USHORT usImageVSize;
-	UCHAR ucHBorder;
-	UCHAR ucVBorder;
-	ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
-	UCHAR ucInternalModeNumber;
-	UCHAR ucRefreshRate;
-} ATOM_DTD_FORMAT;
+typedef struct _ATOM_DTD_FORMAT
+{
+  USHORT  usPixClk;
+  USHORT  usHActive;
+  USHORT  usHBlanking_Time;
+  USHORT  usVActive;
+  USHORT  usVBlanking_Time;			
+  USHORT  usHSyncOffset;
+  USHORT  usHSyncWidth;
+  USHORT  usVSyncOffset;
+  USHORT  usVSyncWidth;
+  USHORT  usImageHSize;
+  USHORT  usImageVSize;
+  UCHAR   ucHBorder;
+  UCHAR   ucVBorder;
+  ATOM_MODE_MISC_INFO_ACCESS susModeMiscInfo;
+  UCHAR   ucInternalModeNumber;
+  UCHAR   ucRefreshRate;
+}ATOM_DTD_FORMAT;
 
-/****************************************************************************/
-/*  Structure used in LVDS_InfoTable */
-/*   * Need a document to describe this table */
-/****************************************************************************/
+/****************************************************************************/	
+// Structure used in LVDS_InfoTable 
+//  * Need a document to describe this table
+/****************************************************************************/	
 #define SUPPORTED_LCD_REFRESHRATE_30Hz          0x0004
 #define SUPPORTED_LCD_REFRESHRATE_40Hz          0x0008
 #define SUPPORTED_LCD_REFRESHRATE_50Hz          0x0010
 #define SUPPORTED_LCD_REFRESHRATE_60Hz          0x0020
 
-/* Once DAL sees this CAP is set, it will read EDID from LCD on its own instead of using sLCDTiming in ATOM_LVDS_INFO_V12. */
-/* Other entries in ATOM_LVDS_INFO_V12 are still valid/useful to DAL */
-#define	LCDPANEL_CAP_READ_EDID									0x1
+//ucTableFormatRevision=1
+//ucTableContentRevision=1
+typedef struct _ATOM_LVDS_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  ATOM_DTD_FORMAT     sLCDTiming;
+  USHORT              usModePatchTableOffset;
+  USHORT              usSupportedRefreshRate;     //Refer to panel info table in ATOMBIOS extension Spec.
+  USHORT              usOffDelayInMs;
+  UCHAR               ucPowerSequenceDigOntoDEin10Ms;
+  UCHAR               ucPowerSequenceDEtoBLOnin10Ms;
+  UCHAR               ucLVDS_Misc;               // Bit0:{=0:single, =1:dual},Bit1 {=0:666RGB, =1:888RGB},Bit2:3:{Grey level}
+                                                 // Bit4:{=0:LDI format for RGB888, =1 FPDI format for RGB888}
+                                                 // Bit5:{=0:Spatial Dithering disabled;1 Spatial Dithering enabled}
+                                                 // Bit6:{=0:Temporal Dithering disabled;1 Temporal Dithering enabled}
+  UCHAR               ucPanelDefaultRefreshRate;
+  UCHAR               ucPanelIdentification;
+  UCHAR               ucSS_Id;
+}ATOM_LVDS_INFO;
 
-/* ucTableFormatRevision=1 */
-/* ucTableContentRevision=1 */
-typedef struct _ATOM_LVDS_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_DTD_FORMAT sLCDTiming;
-	USHORT usModePatchTableOffset;
-	USHORT usSupportedRefreshRate;	/* Refer to panel info table in ATOMBIOS extension Spec. */
-	USHORT usOffDelayInMs;
-	UCHAR ucPowerSequenceDigOntoDEin10Ms;
-	UCHAR ucPowerSequenceDEtoBLOnin10Ms;
-	UCHAR ucLVDS_Misc;	/*  Bit0:{=0:single, =1:dual},Bit1 {=0:666RGB, =1:888RGB},Bit2:3:{Grey level} */
-	/*  Bit4:{=0:LDI format for RGB888, =1 FPDI format for RGB888} */
-	/*  Bit5:{=0:Spatial Dithering disabled;1 Spatial Dithering enabled} */
-	/*  Bit6:{=0:Temporal Dithering disabled;1 Temporal Dithering enabled} */
-	UCHAR ucPanelDefaultRefreshRate;
-	UCHAR ucPanelIdentification;
-	UCHAR ucSS_Id;
-} ATOM_LVDS_INFO;
+//ucTableFormatRevision=1
+//ucTableContentRevision=2
+typedef struct _ATOM_LVDS_INFO_V12
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  ATOM_DTD_FORMAT     sLCDTiming;
+  USHORT              usExtInfoTableOffset;
+  USHORT              usSupportedRefreshRate;     //Refer to panel info table in ATOMBIOS extension Spec.
+  USHORT              usOffDelayInMs;
+  UCHAR               ucPowerSequenceDigOntoDEin10Ms;
+  UCHAR               ucPowerSequenceDEtoBLOnin10Ms;
+  UCHAR               ucLVDS_Misc;               // Bit0:{=0:single, =1:dual},Bit1 {=0:666RGB, =1:888RGB},Bit2:3:{Grey level}
+                                                 // Bit4:{=0:LDI format for RGB888, =1 FPDI format for RGB888}
+                                                 // Bit5:{=0:Spatial Dithering disabled;1 Spatial Dithering enabled}
+                                                 // Bit6:{=0:Temporal Dithering disabled;1 Temporal Dithering enabled}
+  UCHAR               ucPanelDefaultRefreshRate;
+  UCHAR               ucPanelIdentification;
+  UCHAR               ucSS_Id;
+  USHORT              usLCDVenderID;
+  USHORT              usLCDProductID;
+  UCHAR               ucLCDPanel_SpecialHandlingCap; 
+	UCHAR								ucPanelInfoSize;					//  start from ATOM_DTD_FORMAT to end of panel info, include ExtInfoTable
+  UCHAR               ucReserved[2];
+}ATOM_LVDS_INFO_V12;
 
-/* ucTableFormatRevision=1 */
-/* ucTableContentRevision=2 */
-typedef struct _ATOM_LVDS_INFO_V12 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_DTD_FORMAT sLCDTiming;
-	USHORT usExtInfoTableOffset;
-	USHORT usSupportedRefreshRate;	/* Refer to panel info table in ATOMBIOS extension Spec. */
-	USHORT usOffDelayInMs;
-	UCHAR ucPowerSequenceDigOntoDEin10Ms;
-	UCHAR ucPowerSequenceDEtoBLOnin10Ms;
-	UCHAR ucLVDS_Misc;	/*  Bit0:{=0:single, =1:dual},Bit1 {=0:666RGB, =1:888RGB},Bit2:3:{Grey level} */
-	/*  Bit4:{=0:LDI format for RGB888, =1 FPDI format for RGB888} */
-	/*  Bit5:{=0:Spatial Dithering disabled;1 Spatial Dithering enabled} */
-	/*  Bit6:{=0:Temporal Dithering disabled;1 Temporal Dithering enabled} */
-	UCHAR ucPanelDefaultRefreshRate;
-	UCHAR ucPanelIdentification;
-	UCHAR ucSS_Id;
-	USHORT usLCDVenderID;
-	USHORT usLCDProductID;
-	UCHAR ucLCDPanel_SpecialHandlingCap;
-	UCHAR ucPanelInfoSize;	/*   start from ATOM_DTD_FORMAT to end of panel info, include ExtInfoTable */
-	UCHAR ucReserved[2];
-} ATOM_LVDS_INFO_V12;
+//Definitions for ucLCDPanel_SpecialHandlingCap:
+
+//Once DAL sees this CAP is set, it will read EDID from LCD on its own instead of using sLCDTiming in ATOM_LVDS_INFO_V12. 
+//Other entries in ATOM_LVDS_INFO_V12 are still valid/useful to DAL 
+#define	LCDPANEL_CAP_READ_EDID                  0x1
+
+//If a design supports DRR (dynamic refresh rate) on internal panels (LVDS or EDP), this cap is set in ucLCDPanel_SpecialHandlingCap together
+//with multiple supported refresh rates@usSupportedRefreshRate. This cap should not be set when only slow refresh rate is supported (static
+//refresh rate switch by SW. This is only valid from ATOM_LVDS_INFO_V12
+#define	LCDPANEL_CAP_DRR_SUPPORTED              0x2
+
+//Use this cap bit for a quick reference whether an embadded panel (LCD1 ) is LVDS or eDP.
+#define	LCDPANEL_CAP_eDP                        0x4
+
+
+//Color Bit Depth definition in EDID V1.4 @BYTE 14h
+//Bit 6  5  4
+                              //      0  0  0  -  Color bit depth is undefined
+                              //      0  0  1  -  6 Bits per Primary Color
+                              //      0  1  0  -  8 Bits per Primary Color
+                              //      0  1  1  - 10 Bits per Primary Color
+                              //      1  0  0  - 12 Bits per Primary Color
+                              //      1  0  1  - 14 Bits per Primary Color
+                              //      1  1  0  - 16 Bits per Primary Color
+                              //      1  1  1  - Reserved
+
+#define PANEL_COLOR_BIT_DEPTH_MASK    0x70
+
+// Bit7:{=0:Random Dithering disabled;1 Random Dithering enabled}   
+#define PANEL_RANDOM_DITHER   0x80
+#define PANEL_RANDOM_DITHER_MASK   0x80
+
 
 #define ATOM_LVDS_INFO_LAST  ATOM_LVDS_INFO_V12
 
-typedef struct _ATOM_PATCH_RECORD_MODE {
-	UCHAR ucRecordType;
-	USHORT usHDisp;
-	USHORT usVDisp;
-} ATOM_PATCH_RECORD_MODE;
+typedef struct  _ATOM_PATCH_RECORD_MODE
+{
+  UCHAR     ucRecordType;
+  USHORT    usHDisp;
+  USHORT    usVDisp;
+}ATOM_PATCH_RECORD_MODE;
 
-typedef struct _ATOM_LCD_RTS_RECORD {
-	UCHAR ucRecordType;
-	UCHAR ucRTSValue;
-} ATOM_LCD_RTS_RECORD;
+typedef struct  _ATOM_LCD_RTS_RECORD
+{
+  UCHAR     ucRecordType;
+  UCHAR     ucRTSValue;
+}ATOM_LCD_RTS_RECORD;
 
-/* !! If the record below exits, it shoud always be the first record for easy use in command table!!! */
-typedef struct _ATOM_LCD_MODE_CONTROL_CAP {
-	UCHAR ucRecordType;
-	USHORT usLCDCap;
-} ATOM_LCD_MODE_CONTROL_CAP;
+//!! If the record below exits, it shoud always be the first record for easy use in command table!!! 
+// The record below is only used when LVDS_Info is present. From ATOM_LVDS_INFO_V12, use ucLCDPanel_SpecialHandlingCap instead.
+typedef struct  _ATOM_LCD_MODE_CONTROL_CAP
+{
+  UCHAR     ucRecordType;
+  USHORT    usLCDCap;
+}ATOM_LCD_MODE_CONTROL_CAP;
 
 #define LCD_MODE_CAP_BL_OFF                   1
 #define LCD_MODE_CAP_CRTC_OFF                 2
 #define LCD_MODE_CAP_PANEL_OFF                4
 
-typedef struct _ATOM_FAKE_EDID_PATCH_RECORD {
-	UCHAR ucRecordType;
-	UCHAR ucFakeEDIDLength;
-	UCHAR ucFakeEDIDString[1];	/*  This actually has ucFakeEdidLength elements. */
+typedef struct _ATOM_FAKE_EDID_PATCH_RECORD
+{
+  UCHAR ucRecordType;
+  UCHAR ucFakeEDIDLength;
+  UCHAR ucFakeEDIDString[1];    // This actually has ucFakeEdidLength elements.
 } ATOM_FAKE_EDID_PATCH_RECORD;
 
-typedef struct _ATOM_PANEL_RESOLUTION_PATCH_RECORD {
-	UCHAR ucRecordType;
-	USHORT usHSize;
-	USHORT usVSize;
-} ATOM_PANEL_RESOLUTION_PATCH_RECORD;
+typedef struct  _ATOM_PANEL_RESOLUTION_PATCH_RECORD
+{
+   UCHAR    ucRecordType;
+   USHORT		usHSize;
+   USHORT		usVSize;
+}ATOM_PANEL_RESOLUTION_PATCH_RECORD;
 
 #define LCD_MODE_PATCH_RECORD_MODE_TYPE       1
 #define LCD_RTS_RECORD_TYPE                   2
@@ -2306,21 +2829,25 @@
 
 /****************************Spread Spectrum Info Table Definitions **********************/
 
-/* ucTableFormatRevision=1 */
-/* ucTableContentRevision=2 */
-typedef struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT {
-	USHORT usSpreadSpectrumPercentage;
-	UCHAR ucSpreadSpectrumType;	/* Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Others:TBD */
-	UCHAR ucSS_Step;
-	UCHAR ucSS_Delay;
-	UCHAR ucSS_Id;
-	UCHAR ucRecommendedRef_Div;
-	UCHAR ucSS_Range;	/* it was reserved for V11 */
-} ATOM_SPREAD_SPECTRUM_ASSIGNMENT;
+//ucTableFormatRevision=1
+//ucTableContentRevision=2
+typedef struct _ATOM_SPREAD_SPECTRUM_ASSIGNMENT
+{
+  USHORT              usSpreadSpectrumPercentage; 
+  UCHAR               ucSpreadSpectrumType;	    //Bit1=0 Down Spread,=1 Center Spread. Bit1=1 Ext. =0 Int. Bit2=1: PCIE REFCLK SS =0 iternal PPLL SS  Others:TBD
+  UCHAR               ucSS_Step;
+  UCHAR               ucSS_Delay;
+  UCHAR               ucSS_Id;
+  UCHAR               ucRecommendedRef_Div;
+  UCHAR               ucSS_Range;               //it was reserved for V11
+}ATOM_SPREAD_SPECTRUM_ASSIGNMENT;
 
 #define ATOM_MAX_SS_ENTRY                      16
-#define ATOM_DP_SS_ID1												 0x0f1	/*  SS modulation freq=30k */
-#define ATOM_DP_SS_ID2												 0x0f2	/*  SS modulation freq=33k */
+#define ATOM_DP_SS_ID1												 0x0f1			// SS ID for internal DP stream at 2.7Ghz. if ATOM_DP_SS_ID2 does not exist in SS_InfoTable, it is used for internal DP stream at 1.62Ghz as well. 
+#define ATOM_DP_SS_ID2												 0x0f2			// SS ID for internal DP stream at 1.62Ghz, if it exists in SS_InfoTable. 
+#define ATOM_LVLINK_2700MHz_SS_ID              0x0f3      // SS ID for LV link translator chip at 2.7Ghz
+#define ATOM_LVLINK_1620MHz_SS_ID              0x0f4      // SS ID for LV link translator chip at 1.62Ghz
+
 
 #define ATOM_SS_DOWN_SPREAD_MODE_MASK          0x00000000
 #define ATOM_SS_DOWN_SPREAD_MODE               0x00000000
@@ -2329,29 +2856,30 @@
 #define ATOM_INTERNAL_SS_MASK                  0x00000000
 #define ATOM_EXTERNAL_SS_MASK                  0x00000002
 #define EXEC_SS_STEP_SIZE_SHIFT                2
-#define EXEC_SS_DELAY_SHIFT                    4
+#define EXEC_SS_DELAY_SHIFT                    4    
 #define ACTIVEDATA_TO_BLON_DELAY_SHIFT         4
 
-typedef struct _ATOM_SPREAD_SPECTRUM_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_SPREAD_SPECTRUM_ASSIGNMENT asSS_Info[ATOM_MAX_SS_ENTRY];
-} ATOM_SPREAD_SPECTRUM_INFO;
+typedef struct _ATOM_SPREAD_SPECTRUM_INFO
+{ 
+  ATOM_COMMON_TABLE_HEADER	sHeader;
+  ATOM_SPREAD_SPECTRUM_ASSIGNMENT   asSS_Info[ATOM_MAX_SS_ENTRY];
+}ATOM_SPREAD_SPECTRUM_INFO;
 
-/****************************************************************************/
-/*  Structure used in AnalogTV_InfoTable (Top level) */
-/****************************************************************************/
-/* ucTVBootUpDefaultStd definiton: */
+/****************************************************************************/	
+// Structure used in AnalogTV_InfoTable (Top level)
+/****************************************************************************/	
+//ucTVBootUpDefaultStd definiton:
 
-/* ATOM_TV_NTSC                1 */
-/* ATOM_TV_NTSCJ               2 */
-/* ATOM_TV_PAL                 3 */
-/* ATOM_TV_PALM                4 */
-/* ATOM_TV_PALCN               5 */
-/* ATOM_TV_PALN                6 */
-/* ATOM_TV_PAL60               7 */
-/* ATOM_TV_SECAM               8 */
+//ATOM_TV_NTSC                1
+//ATOM_TV_NTSCJ               2
+//ATOM_TV_PAL                 3
+//ATOM_TV_PALM                4
+//ATOM_TV_PALCN               5
+//ATOM_TV_PALN                6
+//ATOM_TV_PAL60               7
+//ATOM_TV_SECAM               8
 
-/* ucTVSuppportedStd definition: */
+//ucTVSupportedStd definition:
 #define NTSC_SUPPORT          0x1
 #define NTSCJ_SUPPORT         0x2
 
@@ -2364,46 +2892,58 @@
 
 #define MAX_SUPPORTED_TV_TIMING    2
 
-typedef struct _ATOM_ANALOG_TV_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR ucTV_SupportedStandard;
-	UCHAR ucTV_BootUpDefaultStandard;
-	UCHAR ucExt_TV_ASIC_ID;
-	UCHAR ucExt_TV_ASIC_SlaveAddr;
-	/*ATOM_DTD_FORMAT          aModeTimings[MAX_SUPPORTED_TV_TIMING]; */
-	ATOM_MODE_TIMING aModeTimings[MAX_SUPPORTED_TV_TIMING];
-} ATOM_ANALOG_TV_INFO;
+typedef struct _ATOM_ANALOG_TV_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  UCHAR                    ucTV_SupportedStandard;
+  UCHAR                    ucTV_BootUpDefaultStandard; 
+  UCHAR                    ucExt_TV_ASIC_ID;
+  UCHAR                    ucExt_TV_ASIC_SlaveAddr;
+  /*ATOM_DTD_FORMAT          aModeTimings[MAX_SUPPORTED_TV_TIMING];*/
+  ATOM_MODE_TIMING         aModeTimings[MAX_SUPPORTED_TV_TIMING];
+}ATOM_ANALOG_TV_INFO;
 
 #define MAX_SUPPORTED_TV_TIMING_V1_2    3
 
-typedef struct _ATOM_ANALOG_TV_INFO_V1_2 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR                    ucTV_SupportedStandard;
-	UCHAR                    ucTV_BootUpDefaultStandard;
-	UCHAR                    ucExt_TV_ASIC_ID;
-	UCHAR                    ucExt_TV_ASIC_SlaveAddr;
-	ATOM_DTD_FORMAT          aModeTimings[MAX_SUPPORTED_TV_TIMING];
-} ATOM_ANALOG_TV_INFO_V1_2;
+typedef struct _ATOM_ANALOG_TV_INFO_V1_2
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  UCHAR                    ucTV_SupportedStandard;
+  UCHAR                    ucTV_BootUpDefaultStandard; 
+  UCHAR                    ucExt_TV_ASIC_ID;
+  UCHAR                    ucExt_TV_ASIC_SlaveAddr;
+  ATOM_DTD_FORMAT          aModeTimings[MAX_SUPPORTED_TV_TIMING];
+}ATOM_ANALOG_TV_INFO_V1_2;
+
+typedef struct _ATOM_DPCD_INFO
+{
+  UCHAR   ucRevisionNumber;        //10h : Revision 1.0; 11h : Revision 1.1   
+  UCHAR   ucMaxLinkRate;           //06h : 1.62Gbps per lane; 0Ah = 2.7Gbps per lane
+  UCHAR   ucMaxLane;               //Bits 4:0 = MAX_LANE_COUNT (1/2/4). Bit 7 = ENHANCED_FRAME_CAP 
+  UCHAR   ucMaxDownSpread;         //Bit0 = 0: No Down spread; Bit0 = 1: 0.5% (Subject to change according to DP spec)
+}ATOM_DPCD_INFO;
+
+#define ATOM_DPCD_MAX_LANE_MASK    0x1F
 
 /**************************************************************************/
-/*  VRAM usage and their definitions */
+// VRAM usage and their defintions
 
-/*  One chunk of VRAM used by Bios are for HWICON surfaces,EDID data. */
-/*  Current Mode timing and Dail Timing and/or STD timing data EACH device. They can be broken down as below. */
-/*  All the addresses below are the offsets from the frame buffer start.They all MUST be Dword aligned! */
-/*  To driver: The physical address of this memory portion=mmFB_START(4K aligned)+ATOMBIOS_VRAM_USAGE_START_ADDR+ATOM_x_ADDR */
-/*  To Bios:  ATOMBIOS_VRAM_USAGE_START_ADDR+ATOM_x_ADDR->MM_INDEX */
+// One chunk of VRAM used by Bios are for HWICON surfaces,EDID data.
+// Current Mode timing and Dail Timing and/or STD timing data EACH device. They can be broken down as below.
+// All the addresses below are the offsets from the frame buffer start.They all MUST be Dword aligned!
+// To driver: The physical address of this memory portion=mmFB_START(4K aligned)+ATOMBIOS_VRAM_USAGE_START_ADDR+ATOM_x_ADDR
+// To Bios:  ATOMBIOS_VRAM_USAGE_START_ADDR+ATOM_x_ADDR->MM_INDEX 
 
 #ifndef VESA_MEMORY_IN_64K_BLOCK
-#define VESA_MEMORY_IN_64K_BLOCK        0x100	/* 256*64K=16Mb (Max. VESA memory is 16Mb!) */
+#define VESA_MEMORY_IN_64K_BLOCK        0x100       //256*64K=16Mb (Max. VESA memory is 16Mb!)
 #endif
 
-#define ATOM_EDID_RAW_DATASIZE          256	/* In Bytes */
-#define ATOM_HWICON_SURFACE_SIZE        4096	/* In Bytes */
+#define ATOM_EDID_RAW_DATASIZE          256         //In Bytes
+#define ATOM_HWICON_SURFACE_SIZE        4096        //In Bytes
 #define ATOM_HWICON_INFOTABLE_SIZE      32
 #define MAX_DTD_MODE_IN_VRAM            6
-#define ATOM_DTD_MODE_SUPPORT_TBL_SIZE  (MAX_DTD_MODE_IN_VRAM*28)	/* 28= (SIZEOF ATOM_DTD_FORMAT) */
-#define ATOM_STD_MODE_SUPPORT_TBL_SIZE  (32*8)	/* 32 is a predefined number,8= (SIZEOF ATOM_STD_FORMAT) */
+#define ATOM_DTD_MODE_SUPPORT_TBL_SIZE  (MAX_DTD_MODE_IN_VRAM*28)    //28= (SIZEOF ATOM_DTD_FORMAT) 
+#define ATOM_STD_MODE_SUPPORT_TBL_SIZE  32*8                         //32 is a predefined number,8= (SIZEOF ATOM_STD_FORMAT)
 #define DFP_ENCODER_TYPE_OFFSET					0x80
 #define DP_ENCODER_LANE_NUM_OFFSET			0x84
 #define DP_ENCODER_LINK_RATE_OFFSET			0x88
@@ -2417,7 +2957,7 @@
 
 #define ATOM_LCD1_EDID_ADDR             (ATOM_CRT1_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
 #define ATOM_LCD1_DTD_MODE_TBL_ADDR     (ATOM_LCD1_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
-#define ATOM_LCD1_STD_MODE_TBL_ADDR	(ATOM_LCD1_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_LCD1_STD_MODE_TBL_ADDR   	(ATOM_LCD1_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
 
 #define ATOM_TV1_DTD_MODE_TBL_ADDR      (ATOM_LCD1_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
 
@@ -2431,13 +2971,13 @@
 
 #define ATOM_LCD2_EDID_ADDR             (ATOM_CRT2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
 #define ATOM_LCD2_DTD_MODE_TBL_ADDR     (ATOM_LCD2_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
-#define ATOM_LCD2_STD_MODE_TBL_ADDR	(ATOM_LCD2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_LCD2_STD_MODE_TBL_ADDR   	(ATOM_LCD2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
 
-#define ATOM_TV2_EDID_ADDR              (ATOM_LCD2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
-#define ATOM_TV2_DTD_MODE_TBL_ADDR      (ATOM_TV2_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
-#define ATOM_TV2_STD_MODE_TBL_ADDR	  (ATOM_TV2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP6_EDID_ADDR             (ATOM_LCD2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP6_DTD_MODE_TBL_ADDR     (ATOM_DFP6_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
+#define ATOM_DFP6_STD_MODE_TBL_ADDR     (ATOM_DFP6_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
 
-#define ATOM_DFP2_EDID_ADDR             (ATOM_TV2_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DFP2_EDID_ADDR             (ATOM_DFP6_STD_MODE_TBL_ADDR + ATOM_STD_MODE_SUPPORT_TBL_SIZE)
 #define ATOM_DFP2_DTD_MODE_TBL_ADDR     (ATOM_DFP2_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
 #define ATOM_DFP2_STD_MODE_TBL_ADDR     (ATOM_DFP2_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
 
@@ -2457,533 +2997,850 @@
 #define ATOM_DFP5_DTD_MODE_TBL_ADDR     (ATOM_DFP5_EDID_ADDR + ATOM_EDID_RAW_DATASIZE)
 #define ATOM_DFP5_STD_MODE_TBL_ADDR     (ATOM_DFP5_DTD_MODE_TBL_ADDR + ATOM_DTD_MODE_SUPPORT_TBL_SIZE)
 
-#define ATOM_DP_TRAINING_TBL_ADDR	(ATOM_DFP5_STD_MODE_TBL_ADDR+ATOM_STD_MODE_SUPPORT_TBL_SIZE)
+#define ATOM_DP_TRAINING_TBL_ADDR				(ATOM_DFP5_STD_MODE_TBL_ADDR+ATOM_STD_MODE_SUPPORT_TBL_SIZE)       
 
-#define ATOM_STACK_STORAGE_START        (ATOM_DP_TRAINING_TBL_ADDR + 256)
-#define ATOM_STACK_STORAGE_END          (ATOM_STACK_STORAGE_START + 512)
+#define ATOM_STACK_STORAGE_START        (ATOM_DP_TRAINING_TBL_ADDR+256)       
+#define ATOM_STACK_STORAGE_END          ATOM_STACK_STORAGE_START+512        
 
-/* The size below is in Kb! */
+//The size below is in Kb!
 #define ATOM_VRAM_RESERVE_SIZE         ((((ATOM_STACK_STORAGE_END - ATOM_HWICON1_SURFACE_ADDR)>>10)+4)&0xFFFC)
-
+   
 #define	ATOM_VRAM_OPERATION_FLAGS_MASK         0xC0000000L
 #define ATOM_VRAM_OPERATION_FLAGS_SHIFT        30
 #define	ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION   0x1
 #define	ATOM_VRAM_BLOCK_NEEDS_RESERVATION      0x0
 
-/***********************************************************************************/
-/*  Structure used in VRAM_UsageByFirmwareTable */
-/*  Note1: This table is filled by SetBiosReservationStartInFB in CoreCommSubs.asm */
-/*         at running time. */
-/*  note2: From RV770, the memory is more than 32bit addressable, so we will change */
-/*         ucTableFormatRevision=1,ucTableContentRevision=4, the strcuture remains */
-/*         exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware */
-/*         (in offset to start of memory address) is KB aligned instead of byte aligend. */
-/***********************************************************************************/
+/***********************************************************************************/	
+// Structure used in VRAM_UsageByFirmwareTable
+// Note1: This table is filled by SetBiosReservationStartInFB in CoreCommSubs.asm
+//        at running time.   
+// note2: From RV770, the memory is more than 32bit addressable, so we will change 
+//        ucTableFormatRevision=1,ucTableContentRevision=4, the strcuture remains 
+//        exactly same as 1.1 and 1.2 (1.3 is never in use), but ulStartAddrUsedByFirmware 
+//        (in offset to start of memory address) is KB aligned instead of byte aligend.
+/***********************************************************************************/	
+// Note3:
+/* If we change usReserved to "usFBUsedbyDrvInKB", then to VBIOS this usFBUsedbyDrvInKB is a predefined, unchanged constant across VGA or non VGA adapter,
+for CAIL, The size of FB access area is known, only thing missing is the Offset of FB Access area, so we can  have:
+
+If (ulStartAddrUsedByFirmware!=0)
+FBAccessAreaOffset= ulStartAddrUsedByFirmware - usFBUsedbyDrvInKB;
+Reserved area has been claimed by VBIOS including this FB access area; CAIL doesn't need to reserve any extra area for this purpose
+else	//Non VGA case
+ if (FB_Size<=2Gb)
+    FBAccessAreaOffset= FB_Size - usFBUsedbyDrvInKB;
+ else
+	  FBAccessAreaOffset= Aper_Size - usFBUsedbyDrvInKB
+
+CAIL needs to claim an reserved area defined by FBAccessAreaOffset and usFBUsedbyDrvInKB in non VGA case.*/
+
 #define ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO			1
 
-typedef struct _ATOM_FIRMWARE_VRAM_RESERVE_INFO {
-	ULONG ulStartAddrUsedByFirmware;
-	USHORT usFirmwareUseInKb;
-	USHORT usReserved;
-} ATOM_FIRMWARE_VRAM_RESERVE_INFO;
+typedef struct _ATOM_FIRMWARE_VRAM_RESERVE_INFO
+{
+  ULONG   ulStartAddrUsedByFirmware;
+  USHORT  usFirmwareUseInKb;
+  USHORT  usReserved;
+}ATOM_FIRMWARE_VRAM_RESERVE_INFO;
 
-typedef struct _ATOM_VRAM_USAGE_BY_FIRMWARE {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_FIRMWARE_VRAM_RESERVE_INFO
-	    asFirmwareVramReserveInfo[ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO];
-} ATOM_VRAM_USAGE_BY_FIRMWARE;
+typedef struct _ATOM_VRAM_USAGE_BY_FIRMWARE
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  ATOM_FIRMWARE_VRAM_RESERVE_INFO	asFirmwareVramReserveInfo[ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO];
+}ATOM_VRAM_USAGE_BY_FIRMWARE;
 
-/****************************************************************************/
-/*  Structure used in GPIO_Pin_LUTTable */
-/****************************************************************************/
-typedef struct _ATOM_GPIO_PIN_ASSIGNMENT {
-	USHORT usGpioPin_AIndex;
-	UCHAR ucGpioPinBitShift;
-	UCHAR ucGPIO_ID;
-} ATOM_GPIO_PIN_ASSIGNMENT;
+// change verion to 1.5, when allow driver to allocate the vram area for command table access. 
+typedef struct _ATOM_FIRMWARE_VRAM_RESERVE_INFO_V1_5
+{
+  ULONG   ulStartAddrUsedByFirmware;
+  USHORT  usFirmwareUseInKb;
+  USHORT  usFBUsedByDrvInKb;
+}ATOM_FIRMWARE_VRAM_RESERVE_INFO_V1_5;
 
-typedef struct _ATOM_GPIO_PIN_LUT {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_GPIO_PIN_ASSIGNMENT asGPIO_Pin[1];
-} ATOM_GPIO_PIN_LUT;
+typedef struct _ATOM_VRAM_USAGE_BY_FIRMWARE_V1_5
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  ATOM_FIRMWARE_VRAM_RESERVE_INFO_V1_5	asFirmwareVramReserveInfo[ATOM_MAX_FIRMWARE_VRAM_USAGE_INFO];
+}ATOM_VRAM_USAGE_BY_FIRMWARE_V1_5;
 
-/****************************************************************************/
-/*  Structure used in ComponentVideoInfoTable */
-/****************************************************************************/
+/****************************************************************************/	
+// Structure used in GPIO_Pin_LUTTable
+/****************************************************************************/	
+typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
+{
+  USHORT                   usGpioPin_AIndex;
+  UCHAR                    ucGpioPinBitShift;
+  UCHAR                    ucGPIO_ID;
+}ATOM_GPIO_PIN_ASSIGNMENT;
+
+typedef struct _ATOM_GPIO_PIN_LUT
+{
+  ATOM_COMMON_TABLE_HEADER  sHeader;
+  ATOM_GPIO_PIN_ASSIGNMENT	asGPIO_Pin[1];
+}ATOM_GPIO_PIN_LUT;
+
+/****************************************************************************/	
+// Structure used in ComponentVideoInfoTable	
+/****************************************************************************/	
 #define GPIO_PIN_ACTIVE_HIGH          0x1
 
 #define MAX_SUPPORTED_CV_STANDARDS    5
 
-/*  definitions for ATOM_D_INFO.ucSettings */
-#define ATOM_GPIO_SETTINGS_BITSHIFT_MASK  0x1F	/*  [4:0] */
-#define ATOM_GPIO_SETTINGS_RESERVED_MASK  0x60	/*  [6:5] = must be zeroed out */
-#define ATOM_GPIO_SETTINGS_ACTIVE_MASK    0x80	/*  [7] */
+// definitions for ATOM_D_INFO.ucSettings
+#define ATOM_GPIO_SETTINGS_BITSHIFT_MASK  0x1F    // [4:0]
+#define ATOM_GPIO_SETTINGS_RESERVED_MASK  0x60    // [6:5] = must be zeroed out
+#define ATOM_GPIO_SETTINGS_ACTIVE_MASK    0x80    // [7]
 
-typedef struct _ATOM_GPIO_INFO {
-	USHORT usAOffset;
-	UCHAR ucSettings;
-	UCHAR ucReserved;
-} ATOM_GPIO_INFO;
+typedef struct _ATOM_GPIO_INFO
+{
+  USHORT  usAOffset;
+  UCHAR   ucSettings;
+  UCHAR   ucReserved;
+}ATOM_GPIO_INFO;
 
-/*  definitions for ATOM_COMPONENT_VIDEO_INFO.ucMiscInfo (bit vector) */
+// definitions for ATOM_COMPONENT_VIDEO_INFO.ucMiscInfo (bit vector)
 #define ATOM_CV_RESTRICT_FORMAT_SELECTION           0x2
 
-/*  definitions for ATOM_COMPONENT_VIDEO_INFO.uc480i/uc480p/uc720p/uc1080i */
-#define ATOM_GPIO_DEFAULT_MODE_EN                   0x80	/* [7]; */
-#define ATOM_GPIO_SETTING_PERMODE_MASK              0x7F	/* [6:0] */
+// definitions for ATOM_COMPONENT_VIDEO_INFO.uc480i/uc480p/uc720p/uc1080i
+#define ATOM_GPIO_DEFAULT_MODE_EN                   0x80 //[7];
+#define ATOM_GPIO_SETTING_PERMODE_MASK              0x7F //[6:0]
 
-/*  definitions for ATOM_COMPONENT_VIDEO_INFO.ucLetterBoxMode */
-/* Line 3 out put 5V. */
-#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_A       0x01	/* represent gpio 3 state for 16:9 */
-#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_B       0x02	/* represent gpio 4 state for 16:9 */
-#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_SHIFT   0x0
+// definitions for ATOM_COMPONENT_VIDEO_INFO.ucLetterBoxMode
+//Line 3 out put 5V.
+#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_A       0x01     //represent gpio 3 state for 16:9
+#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_B       0x02     //represent gpio 4 state for 16:9
+#define ATOM_CV_LINE3_ASPECTRATIO_16_9_GPIO_SHIFT   0x0   
 
-/* Line 3 out put 2.2V */
-#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_A 0x04	/* represent gpio 3 state for 4:3 Letter box */
-#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_B 0x08	/* represent gpio 4 state for 4:3 Letter box */
-#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_SHIFT 0x2
+//Line 3 out put 2.2V              
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_A 0x04     //represent gpio 3 state for 4:3 Letter box
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_B 0x08     //represent gpio 4 state for 4:3 Letter box
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_LETBOX_GPIO_SHIFT 0x2     
 
-/* Line 3 out put 0V */
-#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_A        0x10	/* represent gpio 3 state for 4:3 */
-#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_B        0x20	/* represent gpio 4 state for 4:3 */
-#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_SHIFT    0x4
+//Line 3 out put 0V
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_A        0x10     //represent gpio 3 state for 4:3
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_B        0x20     //represent gpio 4 state for 4:3
+#define ATOM_CV_LINE3_ASPECTRATIO_4_3_GPIO_SHIFT    0x4 
 
-#define ATOM_CV_LINE3_ASPECTRATIO_MASK              0x3F	/*  bit [5:0] */
+#define ATOM_CV_LINE3_ASPECTRATIO_MASK              0x3F     // bit [5:0]
 
-#define ATOM_CV_LINE3_ASPECTRATIO_EXIST             0x80	/* bit 7 */
+#define ATOM_CV_LINE3_ASPECTRATIO_EXIST             0x80     //bit 7
 
-/* GPIO bit index in gpio setting per mode value, also represend the block no. in gpio blocks. */
-#define ATOM_GPIO_INDEX_LINE3_ASPECRATIO_GPIO_A   3	/* bit 3 in uc480i/uc480p/uc720p/uc1080i, which represend the default gpio bit setting for the mode. */
-#define ATOM_GPIO_INDEX_LINE3_ASPECRATIO_GPIO_B   4	/* bit 4 in uc480i/uc480p/uc720p/uc1080i, which represend the default gpio bit setting for the mode. */
+//GPIO bit index in gpio setting per mode value, also represend the block no. in gpio blocks.
+#define ATOM_GPIO_INDEX_LINE3_ASPECRATIO_GPIO_A   3   //bit 3 in uc480i/uc480p/uc720p/uc1080i, which represend the default gpio bit setting for the mode.
+#define ATOM_GPIO_INDEX_LINE3_ASPECRATIO_GPIO_B   4   //bit 4 in uc480i/uc480p/uc720p/uc1080i, which represend the default gpio bit setting for the mode.
 
-typedef struct _ATOM_COMPONENT_VIDEO_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usMask_PinRegisterIndex;
-	USHORT usEN_PinRegisterIndex;
-	USHORT usY_PinRegisterIndex;
-	USHORT usA_PinRegisterIndex;
-	UCHAR ucBitShift;
-	UCHAR ucPinActiveState;	/* ucPinActiveState: Bit0=1 active high, =0 active low */
-	ATOM_DTD_FORMAT sReserved;	/*  must be zeroed out */
-	UCHAR ucMiscInfo;
-	UCHAR uc480i;
-	UCHAR uc480p;
-	UCHAR uc720p;
-	UCHAR uc1080i;
-	UCHAR ucLetterBoxMode;
-	UCHAR ucReserved[3];
-	UCHAR ucNumOfWbGpioBlocks;	/* For Component video D-Connector support. If zere, NTSC type connector */
-	ATOM_GPIO_INFO aWbGpioStateBlock[MAX_SUPPORTED_CV_STANDARDS];
-	ATOM_DTD_FORMAT aModeTimings[MAX_SUPPORTED_CV_STANDARDS];
-} ATOM_COMPONENT_VIDEO_INFO;
 
-/* ucTableFormatRevision=2 */
-/* ucTableContentRevision=1 */
-typedef struct _ATOM_COMPONENT_VIDEO_INFO_V21 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR ucMiscInfo;
-	UCHAR uc480i;
-	UCHAR uc480p;
-	UCHAR uc720p;
-	UCHAR uc1080i;
-	UCHAR ucReserved;
-	UCHAR ucLetterBoxMode;
-	UCHAR ucNumOfWbGpioBlocks;	/* For Component video D-Connector support. If zere, NTSC type connector */
-	ATOM_GPIO_INFO aWbGpioStateBlock[MAX_SUPPORTED_CV_STANDARDS];
-	ATOM_DTD_FORMAT aModeTimings[MAX_SUPPORTED_CV_STANDARDS];
-} ATOM_COMPONENT_VIDEO_INFO_V21;
+typedef struct _ATOM_COMPONENT_VIDEO_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;
+  USHORT             usMask_PinRegisterIndex;
+  USHORT             usEN_PinRegisterIndex;
+  USHORT             usY_PinRegisterIndex;
+  USHORT             usA_PinRegisterIndex;
+  UCHAR              ucBitShift;
+  UCHAR              ucPinActiveState;  //ucPinActiveState: Bit0=1 active high, =0 active low
+  ATOM_DTD_FORMAT    sReserved;         // must be zeroed out
+  UCHAR              ucMiscInfo;
+  UCHAR              uc480i;
+  UCHAR              uc480p;
+  UCHAR              uc720p;
+  UCHAR              uc1080i;
+  UCHAR              ucLetterBoxMode;
+  UCHAR              ucReserved[3];
+  UCHAR              ucNumOfWbGpioBlocks; //For Component video D-Connector support. If zere, NTSC type connector
+  ATOM_GPIO_INFO     aWbGpioStateBlock[MAX_SUPPORTED_CV_STANDARDS];
+  ATOM_DTD_FORMAT    aModeTimings[MAX_SUPPORTED_CV_STANDARDS];
+}ATOM_COMPONENT_VIDEO_INFO;
+
+//ucTableFormatRevision=2
+//ucTableContentRevision=1
+typedef struct _ATOM_COMPONENT_VIDEO_INFO_V21
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;
+  UCHAR              ucMiscInfo;
+  UCHAR              uc480i;
+  UCHAR              uc480p;
+  UCHAR              uc720p;
+  UCHAR              uc1080i;
+  UCHAR              ucReserved;
+  UCHAR              ucLetterBoxMode;
+  UCHAR              ucNumOfWbGpioBlocks; //For Component video D-Connector support. If zere, NTSC type connector
+  ATOM_GPIO_INFO     aWbGpioStateBlock[MAX_SUPPORTED_CV_STANDARDS];
+  ATOM_DTD_FORMAT    aModeTimings[MAX_SUPPORTED_CV_STANDARDS];
+}ATOM_COMPONENT_VIDEO_INFO_V21;
 
 #define ATOM_COMPONENT_VIDEO_INFO_LAST  ATOM_COMPONENT_VIDEO_INFO_V21
 
-/****************************************************************************/
-/*  Structure used in object_InfoTable */
-/****************************************************************************/
-typedef struct _ATOM_OBJECT_HEADER {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usDeviceSupport;
-	USHORT usConnectorObjectTableOffset;
-	USHORT usRouterObjectTableOffset;
-	USHORT usEncoderObjectTableOffset;
-	USHORT usProtectionObjectTableOffset;	/* only available when Protection block is independent. */
-	USHORT usDisplayPathTableOffset;
-} ATOM_OBJECT_HEADER;
+/****************************************************************************/	
+// Structure used in object_InfoTable
+/****************************************************************************/	
+typedef struct _ATOM_OBJECT_HEADER
+{ 
+  ATOM_COMMON_TABLE_HEADER	sHeader;
+  USHORT                    usDeviceSupport;
+  USHORT                    usConnectorObjectTableOffset;
+  USHORT                    usRouterObjectTableOffset;
+  USHORT                    usEncoderObjectTableOffset;
+  USHORT                    usProtectionObjectTableOffset; //only available when Protection block is independent.
+  USHORT                    usDisplayPathTableOffset;
+}ATOM_OBJECT_HEADER;
 
-typedef struct _ATOM_DISPLAY_OBJECT_PATH {
-	USHORT usDeviceTag;	/* supported device */
-	USHORT usSize;		/* the size of ATOM_DISPLAY_OBJECT_PATH */
-	USHORT usConnObjectId;	/* Connector Object ID */
-	USHORT usGPUObjectId;	/* GPU ID */
-	USHORT usGraphicObjIds[1];	/* 1st Encoder Obj source from GPU to last Graphic Obj destinate to connector. */
-} ATOM_DISPLAY_OBJECT_PATH;
+typedef struct _ATOM_OBJECT_HEADER_V3
+{ 
+  ATOM_COMMON_TABLE_HEADER	sHeader;
+  USHORT                    usDeviceSupport;
+  USHORT                    usConnectorObjectTableOffset;
+  USHORT                    usRouterObjectTableOffset;
+  USHORT                    usEncoderObjectTableOffset;
+  USHORT                    usProtectionObjectTableOffset; //only available when Protection block is independent.
+  USHORT                    usDisplayPathTableOffset;
+  USHORT                    usMiscObjectTableOffset;
+}ATOM_OBJECT_HEADER_V3;
 
-typedef struct _ATOM_DISPLAY_OBJECT_PATH_TABLE {
-	UCHAR ucNumOfDispPath;
-	UCHAR ucVersion;
-	UCHAR ucPadding[2];
-	ATOM_DISPLAY_OBJECT_PATH asDispPath[1];
-} ATOM_DISPLAY_OBJECT_PATH_TABLE;
-
-typedef struct _ATOM_OBJECT	/* each object has this structure */
+typedef struct  _ATOM_DISPLAY_OBJECT_PATH
 {
-	USHORT usObjectID;
-	USHORT usSrcDstTableOffset;
-	USHORT usRecordOffset;	/* this pointing to a bunch of records defined below */
-	USHORT usReserved;
-} ATOM_OBJECT;
+  USHORT    usDeviceTag;                                   //supported device 
+  USHORT    usSize;                                        //the size of ATOM_DISPLAY_OBJECT_PATH
+  USHORT    usConnObjectId;                                //Connector Object ID 
+  USHORT    usGPUObjectId;                                 //GPU ID 
+  USHORT    usGraphicObjIds[1];                             //1st Encoder Obj source from GPU to last Graphic Obj destinate to connector.
+}ATOM_DISPLAY_OBJECT_PATH;
 
-typedef struct _ATOM_OBJECT_TABLE	/* Above 4 object table offset pointing to a bunch of objects all have this structure */
+typedef struct _ATOM_DISPLAY_OBJECT_PATH_TABLE
 {
-	UCHAR ucNumberOfObjects;
-	UCHAR ucPadding[3];
-	ATOM_OBJECT asObjects[1];
-} ATOM_OBJECT_TABLE;
+  UCHAR                           ucNumOfDispPath;
+  UCHAR                           ucVersion;
+  UCHAR                           ucPadding[2];
+  ATOM_DISPLAY_OBJECT_PATH        asDispPath[1];
+}ATOM_DISPLAY_OBJECT_PATH_TABLE;
 
-typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT	/* usSrcDstTableOffset pointing to this structure */
+
+typedef struct _ATOM_OBJECT                                //each object has this structure    
 {
-	UCHAR ucNumberOfSrc;
-	USHORT usSrcObjectID[1];
-	UCHAR ucNumberOfDst;
-	USHORT usDstObjectID[1];
-} ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT;
+  USHORT              usObjectID;
+  USHORT              usSrcDstTableOffset;
+  USHORT              usRecordOffset;                     //this pointing to a bunch of records defined below
+  USHORT              usReserved;
+}ATOM_OBJECT;
 
-/* Related definitions, all records are differnt but they have a commond header */
-typedef struct _ATOM_COMMON_RECORD_HEADER {
-	UCHAR ucRecordType;	/* An emun to indicate the record type */
-	UCHAR ucRecordSize;	/* The size of the whole record in byte */
-} ATOM_COMMON_RECORD_HEADER;
+typedef struct _ATOM_OBJECT_TABLE                         //Above 4 object table offset pointing to a bunch of objects all have this structure     
+{
+  UCHAR               ucNumberOfObjects;
+  UCHAR               ucPadding[3];
+  ATOM_OBJECT         asObjects[1];
+}ATOM_OBJECT_TABLE;
 
-#define ATOM_I2C_RECORD_TYPE                           1
+typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT         //usSrcDstTableOffset pointing to this structure
+{
+  UCHAR               ucNumberOfSrc;
+  USHORT              usSrcObjectID[1];
+  UCHAR               ucNumberOfDst;
+  USHORT              usDstObjectID[1];
+}ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT;
+
+
+//Two definitions below are for OPM on MXM module designs
+
+#define EXT_HPDPIN_LUTINDEX_0                   0
+#define EXT_HPDPIN_LUTINDEX_1                   1
+#define EXT_HPDPIN_LUTINDEX_2                   2
+#define EXT_HPDPIN_LUTINDEX_3                   3
+#define EXT_HPDPIN_LUTINDEX_4                   4
+#define EXT_HPDPIN_LUTINDEX_5                   5
+#define EXT_HPDPIN_LUTINDEX_6                   6
+#define EXT_HPDPIN_LUTINDEX_7                   7
+#define MAX_NUMBER_OF_EXT_HPDPIN_LUT_ENTRIES   (EXT_HPDPIN_LUTINDEX_7+1)
+
+#define EXT_AUXDDC_LUTINDEX_0                   0
+#define EXT_AUXDDC_LUTINDEX_1                   1
+#define EXT_AUXDDC_LUTINDEX_2                   2
+#define EXT_AUXDDC_LUTINDEX_3                   3
+#define EXT_AUXDDC_LUTINDEX_4                   4
+#define EXT_AUXDDC_LUTINDEX_5                   5
+#define EXT_AUXDDC_LUTINDEX_6                   6
+#define EXT_AUXDDC_LUTINDEX_7                   7
+#define MAX_NUMBER_OF_EXT_AUXDDC_LUT_ENTRIES   (EXT_AUXDDC_LUTINDEX_7+1)
+
+typedef struct _EXT_DISPLAY_PATH
+{
+  USHORT  usDeviceTag;                    //A bit vector to show what devices are supported 
+  USHORT  usDeviceACPIEnum;               //16bit device ACPI id. 
+  USHORT  usDeviceConnector;              //A physical connector for displays to plug in, using object connector definitions
+  UCHAR   ucExtAUXDDCLutIndex;            //An index into external AUX/DDC channel LUT
+  UCHAR   ucExtHPDPINLutIndex;            //An index into external HPD pin LUT
+  USHORT  usExtEncoderObjId;              //external encoder object id
+  USHORT  usReserved[3]; 
+}EXT_DISPLAY_PATH;
+   
+#define NUMBER_OF_UCHAR_FOR_GUID          16
+#define MAX_NUMBER_OF_EXT_DISPLAY_PATH    7
+
+typedef  struct _ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;
+  UCHAR                    ucGuid [NUMBER_OF_UCHAR_FOR_GUID];     // a GUID is a 16 byte long string
+  EXT_DISPLAY_PATH         sPath[MAX_NUMBER_OF_EXT_DISPLAY_PATH]; // total of fixed 7 entries.
+  UCHAR                    ucChecksum;                            // a  simple Checksum of the sum of whole structure equal to 0x0. 
+  UCHAR                    Reserved [7];                          // for potential expansion
+}ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO;
+
+//Related definitions, all records are differnt but they have a commond header
+typedef struct _ATOM_COMMON_RECORD_HEADER
+{
+  UCHAR               ucRecordType;                      //An emun to indicate the record type
+  UCHAR               ucRecordSize;                      //The size of the whole record in byte
+}ATOM_COMMON_RECORD_HEADER;
+
+
+#define ATOM_I2C_RECORD_TYPE                           1         
 #define ATOM_HPD_INT_RECORD_TYPE                       2
 #define ATOM_OUTPUT_PROTECTION_RECORD_TYPE             3
 #define ATOM_CONNECTOR_DEVICE_TAG_RECORD_TYPE          4
-#define	ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD_TYPE	     5	/* Obsolete, switch to use GPIO_CNTL_RECORD_TYPE */
-#define ATOM_ENCODER_FPGA_CONTROL_RECORD_TYPE          6	/* Obsolete, switch to use GPIO_CNTL_RECORD_TYPE */
+#define	ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD_TYPE	     5 //Obsolete, switch to use GPIO_CNTL_RECORD_TYPE
+#define ATOM_ENCODER_FPGA_CONTROL_RECORD_TYPE          6 //Obsolete, switch to use GPIO_CNTL_RECORD_TYPE
 #define ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD_TYPE      7
-#define ATOM_JTAG_RECORD_TYPE                          8	/* Obsolete, switch to use GPIO_CNTL_RECORD_TYPE */
+#define ATOM_JTAG_RECORD_TYPE                          8 //Obsolete, switch to use GPIO_CNTL_RECORD_TYPE
 #define ATOM_OBJECT_GPIO_CNTL_RECORD_TYPE              9
 #define ATOM_ENCODER_DVO_CF_RECORD_TYPE               10
 #define ATOM_CONNECTOR_CF_RECORD_TYPE                 11
 #define	ATOM_CONNECTOR_HARDCODE_DTD_RECORD_TYPE	      12
 #define ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD_TYPE  13
-#define ATOM_ROUTER_DDC_PATH_SELECT_RECORD_TYPE				14
-#define ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD_TYPE					15
+#define ATOM_ROUTER_DDC_PATH_SELECT_RECORD_TYPE	      14
+#define ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD_TYPE	15
+#define ATOM_CONNECTOR_HPDPIN_LUT_RECORD_TYPE          16 //This is for the case when connectors are not known to object table
+#define ATOM_CONNECTOR_AUXDDC_LUT_RECORD_TYPE          17 //This is for the case when connectors are not known to object table
+#define ATOM_OBJECT_LINK_RECORD_TYPE                   18 //Once this record is present under one object, it indicats the oobject is linked to another obj described by the record
+#define ATOM_CONNECTOR_REMOTE_CAP_RECORD_TYPE          19
 
-/* Must be updated when new record type is added,equal to that record definition! */
-#define ATOM_MAX_OBJECT_RECORD_NUMBER             ATOM_CONNECTOR_CF_RECORD_TYPE
 
-typedef struct _ATOM_I2C_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	ATOM_I2C_ID_CONFIG sucI2cId;
-	UCHAR ucI2CAddr;	/* The slave address, it's 0 when the record is attached to connector for DDC */
-} ATOM_I2C_RECORD;
+//Must be updated when new record type is added,equal to that record definition!
+#define ATOM_MAX_OBJECT_RECORD_NUMBER             ATOM_CONNECTOR_REMOTE_CAP_RECORD_TYPE
 
-typedef struct _ATOM_HPD_INT_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucHPDIntGPIOID;	/* Corresponding block in GPIO_PIN_INFO table gives the pin info */
-	UCHAR ucPlugged_PinState;
-} ATOM_HPD_INT_RECORD;
+typedef struct  _ATOM_I2C_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  ATOM_I2C_ID_CONFIG          sucI2cId; 
+  UCHAR                       ucI2CAddr;              //The slave address, it's 0 when the record is attached to connector for DDC
+}ATOM_I2C_RECORD;
 
-typedef struct _ATOM_OUTPUT_PROTECTION_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucProtectionFlag;
-	UCHAR ucReserved;
-} ATOM_OUTPUT_PROTECTION_RECORD;
+typedef struct  _ATOM_HPD_INT_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucHPDIntGPIOID;         //Corresponding block in GPIO_PIN_INFO table gives the pin info           
+  UCHAR                       ucPlugged_PinState;
+}ATOM_HPD_INT_RECORD;
 
-typedef struct _ATOM_CONNECTOR_DEVICE_TAG {
-	ULONG ulACPIDeviceEnum;	/* Reserved for now */
-	USHORT usDeviceID;	/* This Id is same as "ATOM_DEVICE_XXX_SUPPORT" */
-	USHORT usPadding;
-} ATOM_CONNECTOR_DEVICE_TAG;
 
-typedef struct _ATOM_CONNECTOR_DEVICE_TAG_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucNumberOfDevice;
-	UCHAR ucReserved;
-	ATOM_CONNECTOR_DEVICE_TAG asDeviceTag[1];	/* This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation */
-} ATOM_CONNECTOR_DEVICE_TAG_RECORD;
+typedef struct  _ATOM_OUTPUT_PROTECTION_RECORD 
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucProtectionFlag;
+  UCHAR                       ucReserved;
+}ATOM_OUTPUT_PROTECTION_RECORD;
 
-typedef struct _ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucConfigGPIOID;
-	UCHAR ucConfigGPIOState;	/* Set to 1 when it's active high to enable external flow in */
-	UCHAR ucFlowinGPIPID;
-	UCHAR ucExtInGPIPID;
-} ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD;
+typedef struct  _ATOM_CONNECTOR_DEVICE_TAG
+{
+  ULONG                       ulACPIDeviceEnum;       //Reserved for now
+  USHORT                      usDeviceID;             //This Id is same as "ATOM_DEVICE_XXX_SUPPORT"
+  USHORT                      usPadding;
+}ATOM_CONNECTOR_DEVICE_TAG;
 
-typedef struct _ATOM_ENCODER_FPGA_CONTROL_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucCTL1GPIO_ID;
-	UCHAR ucCTL1GPIOState;	/* Set to 1 when it's active high */
-	UCHAR ucCTL2GPIO_ID;
-	UCHAR ucCTL2GPIOState;	/* Set to 1 when it's active high */
-	UCHAR ucCTL3GPIO_ID;
-	UCHAR ucCTL3GPIOState;	/* Set to 1 when it's active high */
-	UCHAR ucCTLFPGA_IN_ID;
-	UCHAR ucPadding[3];
-} ATOM_ENCODER_FPGA_CONTROL_RECORD;
+typedef struct  _ATOM_CONNECTOR_DEVICE_TAG_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucNumberOfDevice;
+  UCHAR                       ucReserved;
+  ATOM_CONNECTOR_DEVICE_TAG   asDeviceTag[1];         //This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation
+}ATOM_CONNECTOR_DEVICE_TAG_RECORD;
 
-typedef struct _ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucGPIOID;		/* Corresponding block in GPIO_PIN_INFO table gives the pin info */
-	UCHAR ucTVActiveState;	/* Indicating when the pin==0 or 1 when TV is connected */
-} ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD;
 
-typedef struct _ATOM_JTAG_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucTMSGPIO_ID;
-	UCHAR ucTMSGPIOState;	/* Set to 1 when it's active high */
-	UCHAR ucTCKGPIO_ID;
-	UCHAR ucTCKGPIOState;	/* Set to 1 when it's active high */
-	UCHAR ucTDOGPIO_ID;
-	UCHAR ucTDOGPIOState;	/* Set to 1 when it's active high */
-	UCHAR ucTDIGPIO_ID;
-	UCHAR ucTDIGPIOState;	/* Set to 1 when it's active high */
-	UCHAR ucPadding[2];
-} ATOM_JTAG_RECORD;
+typedef struct  _ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR						            ucConfigGPIOID;
+  UCHAR						            ucConfigGPIOState;	    //Set to 1 when it's active high to enable external flow in
+  UCHAR                       ucFlowinGPIPID;
+  UCHAR                       ucExtInGPIPID;
+}ATOM_CONNECTOR_DVI_EXT_INPUT_RECORD;
 
-/* The following generic object gpio pin control record type will replace JTAG_RECORD/FPGA_CONTROL_RECORD/DVI_EXT_INPUT_RECORD above gradually */
-typedef struct _ATOM_GPIO_PIN_CONTROL_PAIR {
-	UCHAR ucGPIOID;		/*  GPIO_ID, find the corresponding ID in GPIO_LUT table */
-	UCHAR ucGPIO_PinState;	/*  Pin state showing how to set-up the pin */
-} ATOM_GPIO_PIN_CONTROL_PAIR;
+typedef struct  _ATOM_ENCODER_FPGA_CONTROL_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucCTL1GPIO_ID;
+  UCHAR                       ucCTL1GPIOState;        //Set to 1 when it's active high
+  UCHAR                       ucCTL2GPIO_ID;
+  UCHAR                       ucCTL2GPIOState;        //Set to 1 when it's active high
+  UCHAR                       ucCTL3GPIO_ID;
+  UCHAR                       ucCTL3GPIOState;        //Set to 1 when it's active high
+  UCHAR                       ucCTLFPGA_IN_ID;
+  UCHAR                       ucPadding[3];
+}ATOM_ENCODER_FPGA_CONTROL_RECORD;
 
-typedef struct _ATOM_OBJECT_GPIO_CNTL_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucFlags;		/*  Future expnadibility */
-	UCHAR ucNumberOfPins;	/*  Number of GPIO pins used to control the object */
-	ATOM_GPIO_PIN_CONTROL_PAIR asGpio[1];	/*  the real gpio pin pair determined by number of pins ucNumberOfPins */
-} ATOM_OBJECT_GPIO_CNTL_RECORD;
+typedef struct  _ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucGPIOID;               //Corresponding block in GPIO_PIN_INFO table gives the pin info 
+  UCHAR                       ucTVActiveState;        //Indicating when the pin==0 or 1 when TV is connected
+}ATOM_CONNECTOR_CVTV_SHARE_DIN_RECORD;
 
-/* Definitions for GPIO pin state */
+typedef struct  _ATOM_JTAG_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucTMSGPIO_ID;
+  UCHAR                       ucTMSGPIOState;         //Set to 1 when it's active high
+  UCHAR                       ucTCKGPIO_ID;
+  UCHAR                       ucTCKGPIOState;         //Set to 1 when it's active high
+  UCHAR                       ucTDOGPIO_ID;
+  UCHAR                       ucTDOGPIOState;         //Set to 1 when it's active high
+  UCHAR                       ucTDIGPIO_ID;
+  UCHAR                       ucTDIGPIOState;         //Set to 1 when it's active high
+  UCHAR                       ucPadding[2];
+}ATOM_JTAG_RECORD;
+
+
+//The following generic object gpio pin control record type will replace JTAG_RECORD/FPGA_CONTROL_RECORD/DVI_EXT_INPUT_RECORD above gradually
+typedef struct _ATOM_GPIO_PIN_CONTROL_PAIR
+{
+  UCHAR                       ucGPIOID;               // GPIO_ID, find the corresponding ID in GPIO_LUT table
+  UCHAR                       ucGPIO_PinState;        // Pin state showing how to set-up the pin
+}ATOM_GPIO_PIN_CONTROL_PAIR;
+
+typedef struct  _ATOM_OBJECT_GPIO_CNTL_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucFlags;                // Future expnadibility
+  UCHAR                       ucNumberOfPins;         // Number of GPIO pins used to control the object
+  ATOM_GPIO_PIN_CONTROL_PAIR  asGpio[1];              // the real gpio pin pair determined by number of pins ucNumberOfPins
+}ATOM_OBJECT_GPIO_CNTL_RECORD;
+
+//Definitions for GPIO pin state 
 #define GPIO_PIN_TYPE_INPUT             0x00
 #define GPIO_PIN_TYPE_OUTPUT            0x10
 #define GPIO_PIN_TYPE_HW_CONTROL        0x20
 
-/* For GPIO_PIN_TYPE_OUTPUT the following is defined */
+//For GPIO_PIN_TYPE_OUTPUT the following is defined 
 #define GPIO_PIN_OUTPUT_STATE_MASK      0x01
 #define GPIO_PIN_OUTPUT_STATE_SHIFT     0
 #define GPIO_PIN_STATE_ACTIVE_LOW       0x0
 #define GPIO_PIN_STATE_ACTIVE_HIGH      0x1
 
-typedef struct _ATOM_ENCODER_DVO_CF_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	ULONG ulStrengthControl;	/*  DVOA strength control for CF */
-	UCHAR ucPadding[2];
-} ATOM_ENCODER_DVO_CF_RECORD;
+// Indexes to GPIO array in GLSync record 
+#define ATOM_GPIO_INDEX_GLSYNC_REFCLK    0
+#define ATOM_GPIO_INDEX_GLSYNC_HSYNC     1
+#define ATOM_GPIO_INDEX_GLSYNC_VSYNC     2
+#define ATOM_GPIO_INDEX_GLSYNC_SWAP_REQ  3
+#define ATOM_GPIO_INDEX_GLSYNC_SWAP_GNT  4
+#define ATOM_GPIO_INDEX_GLSYNC_INTERRUPT 5
+#define ATOM_GPIO_INDEX_GLSYNC_V_RESET   6
+#define ATOM_GPIO_INDEX_GLSYNC_MAX       7
 
-/*  value for ATOM_CONNECTOR_CF_RECORD.ucConnectedDvoBundle */
+typedef struct  _ATOM_ENCODER_DVO_CF_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  ULONG                       ulStrengthControl;      // DVOA strength control for CF
+  UCHAR                       ucPadding[2];
+}ATOM_ENCODER_DVO_CF_RECORD;
+
+// value for ATOM_CONNECTOR_CF_RECORD.ucConnectedDvoBundle
 #define ATOM_CONNECTOR_CF_RECORD_CONNECTED_UPPER12BITBUNDLEA   1
 #define ATOM_CONNECTOR_CF_RECORD_CONNECTED_LOWER12BITBUNDLEB   2
 
-typedef struct _ATOM_CONNECTOR_CF_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	USHORT usMaxPixClk;
-	UCHAR ucFlowCntlGpioId;
-	UCHAR ucSwapCntlGpioId;
-	UCHAR ucConnectedDvoBundle;
-	UCHAR ucPadding;
-} ATOM_CONNECTOR_CF_RECORD;
+typedef struct  _ATOM_CONNECTOR_CF_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  USHORT                      usMaxPixClk;
+  UCHAR                       ucFlowCntlGpioId;
+  UCHAR                       ucSwapCntlGpioId;
+  UCHAR                       ucConnectedDvoBundle;
+  UCHAR                       ucPadding;
+}ATOM_CONNECTOR_CF_RECORD;
 
-typedef struct _ATOM_CONNECTOR_HARDCODE_DTD_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	ATOM_DTD_FORMAT asTiming;
-} ATOM_CONNECTOR_HARDCODE_DTD_RECORD;
+typedef struct  _ATOM_CONNECTOR_HARDCODE_DTD_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+	ATOM_DTD_FORMAT							asTiming;
+}ATOM_CONNECTOR_HARDCODE_DTD_RECORD;
 
-typedef struct _ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;	/* ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD_TYPE */
-	UCHAR ucSubConnectorType;	/* CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D|X_ID_DUAL_LINK_DVI_D|HDMI_TYPE_A */
-	UCHAR ucReserved;
-} ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD;
+typedef struct _ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;                //ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD_TYPE
+  UCHAR                       ucSubConnectorType;     //CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_D|X_ID_DUAL_LINK_DVI_D|HDMI_TYPE_A
+  UCHAR                       ucReserved;
+}ATOM_CONNECTOR_PCIE_SUBCONNECTOR_RECORD;
 
-typedef struct _ATOM_ROUTER_DDC_PATH_SELECT_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucMuxType;	/* decide the number of ucMuxState, =0, no pin state, =1: single state with complement, >1: multiple state */
-	UCHAR ucMuxControlPin;
-	UCHAR ucMuxState[2];	/* for alligment purpose */
-} ATOM_ROUTER_DDC_PATH_SELECT_RECORD;
 
-typedef struct _ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD {
-	ATOM_COMMON_RECORD_HEADER sheader;
-	UCHAR ucMuxType;
-	UCHAR ucMuxControlPin;
-	UCHAR ucMuxState[2];	/* for alligment purpose */
-} ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD;
+typedef struct _ATOM_ROUTER_DDC_PATH_SELECT_RECORD
+{
+	ATOM_COMMON_RECORD_HEADER   sheader;                
+	UCHAR												ucMuxType;							//decide the number of ucMuxState, =0, no pin state, =1: single state with complement, >1: multiple state
+	UCHAR												ucMuxControlPin;
+	UCHAR												ucMuxState[2];					//for alligment purpose
+}ATOM_ROUTER_DDC_PATH_SELECT_RECORD;
 
-/*  define ucMuxType */
+typedef struct _ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD
+{
+	ATOM_COMMON_RECORD_HEADER   sheader;                
+	UCHAR												ucMuxType;
+	UCHAR												ucMuxControlPin;
+	UCHAR												ucMuxState[2];					//for alligment purpose
+}ATOM_ROUTER_DATA_CLOCK_PATH_SELECT_RECORD;
+
+// define ucMuxType
 #define ATOM_ROUTER_MUX_PIN_STATE_MASK								0x0f
 #define ATOM_ROUTER_MUX_PIN_SINGLE_STATE_COMPLEMENT		0x01
 
-/****************************************************************************/
-/*  ASIC voltage data table */
-/****************************************************************************/
-typedef struct _ATOM_VOLTAGE_INFO_HEADER {
-	USHORT usVDDCBaseLevel;	/* In number of 50mv unit */
-	USHORT usReserved;	/* For possible extension table offset */
-	UCHAR ucNumOfVoltageEntries;
-	UCHAR ucBytesPerVoltageEntry;
-	UCHAR ucVoltageStep;	/* Indicating in how many mv increament is one step, 0.5mv unit */
-	UCHAR ucDefaultVoltageEntry;
-	UCHAR ucVoltageControlI2cLine;
-	UCHAR ucVoltageControlAddress;
-	UCHAR ucVoltageControlOffset;
-} ATOM_VOLTAGE_INFO_HEADER;
+typedef struct _ATOM_CONNECTOR_HPDPIN_LUT_RECORD     //record for ATOM_CONNECTOR_HPDPIN_LUT_RECORD_TYPE
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  UCHAR                       ucHPDPINMap[MAX_NUMBER_OF_EXT_HPDPIN_LUT_ENTRIES];  //An fixed size array which maps external pins to internal GPIO_PIN_INFO table 
+}ATOM_CONNECTOR_HPDPIN_LUT_RECORD;
 
-typedef struct _ATOM_VOLTAGE_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_VOLTAGE_INFO_HEADER viHeader;
-	UCHAR ucVoltageEntries[64];	/* 64 is for allocation, the actual number of entry is present at ucNumOfVoltageEntries*ucBytesPerVoltageEntry */
-} ATOM_VOLTAGE_INFO;
+typedef struct _ATOM_CONNECTOR_AUXDDC_LUT_RECORD  //record for ATOM_CONNECTOR_AUXDDC_LUT_RECORD_TYPE
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  ATOM_I2C_ID_CONFIG          ucAUXDDCMap[MAX_NUMBER_OF_EXT_AUXDDC_LUT_ENTRIES];  //An fixed size array which maps external pins to internal DDC ID
+}ATOM_CONNECTOR_AUXDDC_LUT_RECORD;
 
-typedef struct _ATOM_VOLTAGE_FORMULA {
-	USHORT usVoltageBaseLevel;	/*  In number of 1mv unit */
-	USHORT usVoltageStep;	/*  Indicating in how many mv increament is one step, 1mv unit */
-	UCHAR ucNumOfVoltageEntries;	/*  Number of Voltage Entry, which indicate max Voltage */
-	UCHAR ucFlag;		/*  bit0=0 :step is 1mv =1 0.5mv */
-	UCHAR ucBaseVID;	/*  if there is no lookup table, VID= BaseVID + ( Vol - BaseLevle ) /VoltageStep */
-	UCHAR ucReserved;
-	UCHAR ucVIDAdjustEntries[32];	/*  32 is for allocation, the actual number of entry is present at ucNumOfVoltageEntries */
-} ATOM_VOLTAGE_FORMULA;
+typedef struct _ATOM_OBJECT_LINK_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  USHORT                      usObjectID;         //could be connector, encorder or other object in object.h
+}ATOM_OBJECT_LINK_RECORD;
 
-typedef struct _ATOM_VOLTAGE_CONTROL {
-	UCHAR ucVoltageControlId;	/* Indicate it is controlled by I2C or GPIO or HW state machine */
-	UCHAR ucVoltageControlI2cLine;
-	UCHAR ucVoltageControlAddress;
-	UCHAR ucVoltageControlOffset;
-	USHORT usGpioPin_AIndex;	/* GPIO_PAD register index */
-	UCHAR ucGpioPinBitShift[9];	/* at most 8 pin support 255 VIDs, termintate with 0xff */
-	UCHAR ucReserved;
-} ATOM_VOLTAGE_CONTROL;
+typedef struct _ATOM_CONNECTOR_REMOTE_CAP_RECORD
+{
+  ATOM_COMMON_RECORD_HEADER   sheader;
+  USHORT                      usReserved;
+}ATOM_CONNECTOR_REMOTE_CAP_RECORD;
 
-/*  Define ucVoltageControlId */
+/****************************************************************************/	
+// ASIC voltage data table
+/****************************************************************************/	
+typedef struct  _ATOM_VOLTAGE_INFO_HEADER
+{
+   USHORT   usVDDCBaseLevel;                //In number of 50mv unit
+   USHORT   usReserved;                     //For possible extension table offset
+   UCHAR    ucNumOfVoltageEntries;
+   UCHAR    ucBytesPerVoltageEntry;
+   UCHAR    ucVoltageStep;                  //Indicating in how many mv increament is one step, 0.5mv unit
+   UCHAR    ucDefaultVoltageEntry;
+   UCHAR    ucVoltageControlI2cLine;
+   UCHAR    ucVoltageControlAddress;
+   UCHAR    ucVoltageControlOffset;
+}ATOM_VOLTAGE_INFO_HEADER;
+
+typedef struct  _ATOM_VOLTAGE_INFO
+{
+   ATOM_COMMON_TABLE_HEADER	sHeader; 
+   ATOM_VOLTAGE_INFO_HEADER viHeader;
+   UCHAR    ucVoltageEntries[64];            //64 is for allocation, the actual number of entry is present at ucNumOfVoltageEntries*ucBytesPerVoltageEntry
+}ATOM_VOLTAGE_INFO;
+
+
+typedef struct  _ATOM_VOLTAGE_FORMULA
+{
+   USHORT   usVoltageBaseLevel;             // In number of 1mv unit
+   USHORT   usVoltageStep;                  // Indicating in how many mv increament is one step, 1mv unit
+	 UCHAR		ucNumOfVoltageEntries;					// Number of Voltage Entry, which indicate max Voltage
+	 UCHAR		ucFlag;													// bit0=0 :step is 1mv =1 0.5mv
+	 UCHAR		ucBaseVID;											// if there is no lookup table, VID= BaseVID + ( Vol - BaseLevle ) /VoltageStep
+	 UCHAR		ucReserved;
+	 UCHAR		ucVIDAdjustEntries[32];					// 32 is for allocation, the actual number of entry is present at ucNumOfVoltageEntries
+}ATOM_VOLTAGE_FORMULA;
+
+typedef struct  _VOLTAGE_LUT_ENTRY
+{
+	 USHORT		usVoltageCode;									// The Voltage ID, either GPIO or I2C code
+	 USHORT		usVoltageValue;									// The corresponding Voltage Value, in mV
+}VOLTAGE_LUT_ENTRY;
+
+typedef struct  _ATOM_VOLTAGE_FORMULA_V2
+{
+	 UCHAR		ucNumOfVoltageEntries;					// Number of Voltage Entry, which indicate max Voltage
+	 UCHAR		ucReserved[3];
+	 VOLTAGE_LUT_ENTRY asVIDAdjustEntries[32];// 32 is for allocation, the actual number of entries is in ucNumOfVoltageEntries
+}ATOM_VOLTAGE_FORMULA_V2;
+
+typedef struct _ATOM_VOLTAGE_CONTROL
+{
+	UCHAR		 ucVoltageControlId;							//Indicate it is controlled by I2C or GPIO or HW state machine		
+  UCHAR    ucVoltageControlI2cLine;
+  UCHAR    ucVoltageControlAddress;
+  UCHAR    ucVoltageControlOffset;	 	
+  USHORT   usGpioPin_AIndex;								//GPIO_PAD register index
+  UCHAR    ucGpioPinBitShift[9];						//at most 8 pin support 255 VIDs, termintate with 0xff
+	UCHAR		 ucReserved;
+}ATOM_VOLTAGE_CONTROL;
+
+// Define ucVoltageControlId
 #define	VOLTAGE_CONTROLLED_BY_HW							0x00
 #define	VOLTAGE_CONTROLLED_BY_I2C_MASK				0x7F
 #define	VOLTAGE_CONTROLLED_BY_GPIO						0x80
-#define	VOLTAGE_CONTROL_ID_LM64								0x01	/* I2C control, used for R5xx Core Voltage */
-#define	VOLTAGE_CONTROL_ID_DAC								0x02	/* I2C control, used for R5xx/R6xx MVDDC,MVDDQ or VDDCI */
-#define	VOLTAGE_CONTROL_ID_VT116xM						0x03	/* I2C control, used for R6xx Core Voltage */
-#define VOLTAGE_CONTROL_ID_DS4402							0x04
+#define	VOLTAGE_CONTROL_ID_LM64								0x01									//I2C control, used for R5xx Core Voltage
+#define	VOLTAGE_CONTROL_ID_DAC								0x02									//I2C control, used for R5xx/R6xx MVDDC,MVDDQ or VDDCI
+#define	VOLTAGE_CONTROL_ID_VT116xM						0x03									//I2C control, used for R6xx Core Voltage
+#define VOLTAGE_CONTROL_ID_DS4402							0x04									
 
-typedef struct _ATOM_VOLTAGE_OBJECT {
-	UCHAR ucVoltageType;	/* Indicate Voltage Source: VDDC, MVDDC, MVDDQ or MVDDCI */
-	UCHAR ucSize;		/* Size of Object */
-	ATOM_VOLTAGE_CONTROL asControl;	/* describ how to control */
-	ATOM_VOLTAGE_FORMULA asFormula;	/* Indicate How to convert real Voltage to VID */
-} ATOM_VOLTAGE_OBJECT;
+typedef struct  _ATOM_VOLTAGE_OBJECT
+{
+ 	 UCHAR		ucVoltageType;									//Indicate Voltage Source: VDDC, MVDDC, MVDDQ or MVDDCI	 
+	 UCHAR		ucSize;													//Size of Object	
+	 ATOM_VOLTAGE_CONTROL			asControl;			//describ how to control 	 
+ 	 ATOM_VOLTAGE_FORMULA			asFormula;			//Indicate How to convert real Voltage to VID 
+}ATOM_VOLTAGE_OBJECT;
 
-typedef struct _ATOM_VOLTAGE_OBJECT_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_VOLTAGE_OBJECT asVoltageObj[3];	/* Info for Voltage control */
-} ATOM_VOLTAGE_OBJECT_INFO;
+typedef struct  _ATOM_VOLTAGE_OBJECT_V2
+{
+ 	 UCHAR		ucVoltageType;									//Indicate Voltage Source: VDDC, MVDDC, MVDDQ or MVDDCI	 
+	 UCHAR		ucSize;													//Size of Object	
+	 ATOM_VOLTAGE_CONTROL			asControl;			//describ how to control 	 
+ 	 ATOM_VOLTAGE_FORMULA_V2	asFormula;			//Indicate How to convert real Voltage to VID 
+}ATOM_VOLTAGE_OBJECT_V2;
 
-typedef struct _ATOM_LEAKID_VOLTAGE {
-	UCHAR ucLeakageId;
-	UCHAR ucReserved;
-	USHORT usVoltage;
-} ATOM_LEAKID_VOLTAGE;
+typedef struct  _ATOM_VOLTAGE_OBJECT_INFO
+{
+   ATOM_COMMON_TABLE_HEADER	sHeader; 
+	 ATOM_VOLTAGE_OBJECT			asVoltageObj[3];	//Info for Voltage control	  	 
+}ATOM_VOLTAGE_OBJECT_INFO;
 
-typedef struct _ATOM_ASIC_PROFILE_VOLTAGE {
-	UCHAR ucProfileId;
-	UCHAR ucReserved;
-	USHORT usSize;
-	USHORT usEfuseSpareStartAddr;
-	USHORT usFuseIndex[8];	/* from LSB to MSB, Max 8bit,end of 0xffff if less than 8 efuse id, */
-	ATOM_LEAKID_VOLTAGE asLeakVol[2];	/* Leakid and relatd voltage */
-} ATOM_ASIC_PROFILE_VOLTAGE;
+typedef struct  _ATOM_VOLTAGE_OBJECT_INFO_V2
+{
+   ATOM_COMMON_TABLE_HEADER	sHeader; 
+	 ATOM_VOLTAGE_OBJECT_V2			asVoltageObj[3];	//Info for Voltage control	  	 
+}ATOM_VOLTAGE_OBJECT_INFO_V2;
 
-/* ucProfileId */
-#define	ATOM_ASIC_PROFILE_ID_EFUSE_VOLTAGE			1
+typedef struct  _ATOM_LEAKID_VOLTAGE
+{
+	UCHAR		ucLeakageId;
+	UCHAR		ucReserved;
+	USHORT	usVoltage;
+}ATOM_LEAKID_VOLTAGE;
+
+typedef struct  _ATOM_ASIC_PROFILE_VOLTAGE
+{
+	UCHAR		ucProfileId;
+	UCHAR		ucReserved;
+	USHORT	usSize;
+	USHORT	usEfuseSpareStartAddr;
+	USHORT	usFuseIndex[8];												//from LSB to MSB, Max 8bit,end of 0xffff if less than 8 efuse id, 
+	ATOM_LEAKID_VOLTAGE					asLeakVol[2];			//Leakid and relatd voltage
+}ATOM_ASIC_PROFILE_VOLTAGE;
+
+//ucProfileId
+#define	ATOM_ASIC_PROFILE_ID_EFUSE_VOLTAGE			1		
 #define	ATOM_ASIC_PROFILE_ID_EFUSE_PERFORMANCE_VOLTAGE			1
 #define	ATOM_ASIC_PROFILE_ID_EFUSE_THERMAL_VOLTAGE					2
 
-typedef struct _ATOM_ASIC_PROFILING_INFO {
-	ATOM_COMMON_TABLE_HEADER asHeader;
-	ATOM_ASIC_PROFILE_VOLTAGE asVoltage;
-} ATOM_ASIC_PROFILING_INFO;
+typedef struct  _ATOM_ASIC_PROFILING_INFO
+{
+  ATOM_COMMON_TABLE_HEADER			asHeader; 
+	ATOM_ASIC_PROFILE_VOLTAGE			asVoltage;
+}ATOM_ASIC_PROFILING_INFO;
 
-typedef struct _ATOM_POWER_SOURCE_OBJECT {
-	UCHAR ucPwrSrcId;	/*  Power source */
-	UCHAR ucPwrSensorType;	/*  GPIO, I2C or none */
-	UCHAR ucPwrSensId;	/*  if GPIO detect, it is GPIO id,  if I2C detect, it is I2C id */
-	UCHAR ucPwrSensSlaveAddr;	/*  Slave address if I2C detect */
-	UCHAR ucPwrSensRegIndex;	/*  I2C register Index if I2C detect */
-	UCHAR ucPwrSensRegBitMask;	/*  detect which bit is used if I2C detect */
-	UCHAR ucPwrSensActiveState;	/*  high active or low active */
-	UCHAR ucReserve[3];	/*  reserve */
-	USHORT usSensPwr;	/*  in unit of watt */
-} ATOM_POWER_SOURCE_OBJECT;
+typedef struct _ATOM_POWER_SOURCE_OBJECT
+{
+	UCHAR	ucPwrSrcId;													// Power source
+	UCHAR	ucPwrSensorType;										// GPIO, I2C or none
+	UCHAR	ucPwrSensId;											  // if GPIO detect, it is GPIO id,  if I2C detect, it is I2C id
+	UCHAR	ucPwrSensSlaveAddr;									// Slave address if I2C detect
+	UCHAR ucPwrSensRegIndex;									// I2C register Index if I2C detect
+	UCHAR ucPwrSensRegBitMask;								// detect which bit is used if I2C detect
+	UCHAR	ucPwrSensActiveState;								// high active or low active
+	UCHAR	ucReserve[3];												// reserve		
+	USHORT usSensPwr;													// in unit of watt
+}ATOM_POWER_SOURCE_OBJECT;
 
-typedef struct _ATOM_POWER_SOURCE_INFO {
-	ATOM_COMMON_TABLE_HEADER asHeader;
-	UCHAR asPwrbehave[16];
-	ATOM_POWER_SOURCE_OBJECT asPwrObj[1];
-} ATOM_POWER_SOURCE_INFO;
+typedef struct _ATOM_POWER_SOURCE_INFO
+{
+		ATOM_COMMON_TABLE_HEADER		asHeader;
+		UCHAR												asPwrbehave[16];
+		ATOM_POWER_SOURCE_OBJECT		asPwrObj[1];
+}ATOM_POWER_SOURCE_INFO;
 
-/* Define ucPwrSrcId */
+
+//Define ucPwrSrcId
 #define POWERSOURCE_PCIE_ID1						0x00
 #define POWERSOURCE_6PIN_CONNECTOR_ID1	0x01
 #define POWERSOURCE_8PIN_CONNECTOR_ID1	0x02
 #define POWERSOURCE_6PIN_CONNECTOR_ID2	0x04
 #define POWERSOURCE_8PIN_CONNECTOR_ID2	0x08
 
-/* define ucPwrSensorId */
+//define ucPwrSensorId
 #define POWER_SENSOR_ALWAYS							0x00
 #define POWER_SENSOR_GPIO								0x01
 #define POWER_SENSOR_I2C								0x02
 
+typedef struct _ATOM_INTEGRATED_SYSTEM_INFO_V6
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+  ULONG  ulBootUpEngineClock;
+  ULONG  ulDentistVCOFreq;          
+  ULONG  ulBootUpUMAClock;          
+  ULONG  ulReserved1[8];            
+  ULONG  ulBootUpReqDisplayVector;
+  ULONG  ulOtherDisplayMisc;
+  ULONG  ulGPUCapInfo;
+  ULONG  ulReserved2[3];            
+  ULONG  ulSystemConfig;            
+  ULONG  ulCPUCapInfo;              
+  USHORT usMaxNBVoltage;  
+  USHORT usMinNBVoltage;  
+  USHORT usBootUpNBVoltage;         
+  USHORT usExtDispConnInfoOffset;  
+  UCHAR  ucHtcTmpLmt;   
+  UCHAR  ucTjOffset;    
+  UCHAR  ucMemoryType;  
+  UCHAR  ucUMAChannelNumber;
+  ULONG  ulCSR_M3_ARB_CNTL_DEFAULT[10];  
+  ULONG  ulCSR_M3_ARB_CNTL_UVD[10]; 
+  ULONG  ulCSR_M3_ARB_CNTL_FS3D[10];
+  ULONG  ulReserved3[42]; 
+  ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO sExtDispConnInfo;   
+}ATOM_INTEGRATED_SYSTEM_INFO_V6;   
+
+/**********************************************************************************************************************
+// ATOM_INTEGRATED_SYSTEM_INFO_V6 Description
+//ulBootUpEngineClock:              VBIOS bootup Engine clock frequency, in 10kHz unit. 
+//ulDentistVCOFreq:                 Dentist VCO clock in 10kHz unit. 
+//ulBootUpUMAClock:                 System memory boot up clock frequency in 10Khz unit. 
+//ulReserved1[8]                    Reserved by now, must be 0x0. 
+//ulBootUpReqDisplayVector	        VBIOS boot up display IDs
+//                                  ATOM_DEVICE_CRT1_SUPPORT                  0x0001
+//                                  ATOM_DEVICE_CRT2_SUPPORT                  0x0010
+//                                  ATOM_DEVICE_DFP1_SUPPORT                  0x0008 
+//                                  ATOM_DEVICE_DFP6_SUPPORT                  0x0040 
+//                                  ATOM_DEVICE_DFP2_SUPPORT                  0x0080       
+//                                  ATOM_DEVICE_DFP3_SUPPORT                  0x0200       
+//                                  ATOM_DEVICE_DFP4_SUPPORT                  0x0400        
+//                                  ATOM_DEVICE_DFP5_SUPPORT                  0x0800
+//                                  ATOM_DEVICE_LCD1_SUPPORT                  0x0002
+//ulOtherDisplayMisc      	        Other display related flags, not defined yet. 
+//ulGPUCapInfo                      TBD
+//ulReserved2[3]                    must be 0x0 for the reserved.
+//ulSystemConfig                    TBD
+//ulCPUCapInfo                      TBD
+//usMaxNBVoltage                    High NB voltage in unit of mv, calculated using current VDDNB (D24F2xDC) and VDDNB offset fuse. 
+//usMinNBVoltage                    Low NB voltage in unit of mv, calculated using current VDDNB (D24F2xDC) and VDDNB offset fuse.
+//usBootUpNBVoltage                 Boot up NB voltage in unit of mv.
+//ucHtcTmpLmt                       Bit [22:16] of D24F3x64 Thermal Control (HTC) Register.
+//ucTjOffset                        Bit [28:22] of D24F3xE4 Thermtrip Status Register,may not be needed.
+//ucMemoryType                      [3:0]=1:DDR1;=2:DDR2;=3:DDR3.[7:4] is reserved.
+//ucUMAChannelNumber      	        System memory channel numbers. 
+//usExtDispConnectionInfoOffset     ATOM_EXTERNAL_DISPLAY_CONNECTION_INFO offset relative to beginning of this table. 
+//ulCSR_M3_ARB_CNTL_DEFAULT[10]     Arrays with values for CSR M3 arbiter for default
+//ulCSR_M3_ARB_CNTL_UVD[10]         Arrays with values for CSR M3 arbiter for UVD playback.
+//ulCSR_M3_ARB_CNTL_FS3D[10]        Arrays with values for CSR M3 arbiter for Full Screen 3D applications.
+**********************************************************************************************************************/
+
 /**************************************************************************/
-/*  This portion is only used when ext thermal chip or engine/memory clock SS chip is populated on a design */
-/* Memory SS Info Table */
-/* Define Memory Clock SS chip ID */
+// This portion is only used when ext thermal chip or engine/memory clock SS chip is populated on a design
+//Memory SS Info Table
+//Define Memory Clock SS chip ID
 #define ICS91719  1
 #define ICS91720  2
 
-/* Define one structure to inform SW a "block of data" writing to external SS chip via I2C protocol */
-typedef struct _ATOM_I2C_DATA_RECORD {
-	UCHAR ucNunberOfBytes;	/* Indicates how many bytes SW needs to write to the external ASIC for one block, besides to "Start" and "Stop" */
-	UCHAR ucI2CData[1];	/* I2C data in bytes, should be less than 16 bytes usually */
-} ATOM_I2C_DATA_RECORD;
+//Define one structure to inform SW a "block of data" writing to external SS chip via I2C protocol
+typedef struct _ATOM_I2C_DATA_RECORD
+{
+  UCHAR         ucNunberOfBytes;                                              //Indicates how many bytes SW needs to write to the external ASIC for one block, besides to "Start" and "Stop"
+  UCHAR         ucI2CData[1];                                                 //I2C data in bytes, should be less than 16 bytes usually
+}ATOM_I2C_DATA_RECORD;
 
-/* Define one structure to inform SW how many blocks of data writing to external SS chip via I2C protocol, in addition to other information */
-typedef struct _ATOM_I2C_DEVICE_SETUP_INFO {
-	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;	/* I2C line and HW/SW assisted cap. */
-	UCHAR ucSSChipID;	/* SS chip being used */
-	UCHAR ucSSChipSlaveAddr;	/* Slave Address to set up this SS chip */
-	UCHAR ucNumOfI2CDataRecords;	/* number of data block */
-	ATOM_I2C_DATA_RECORD asI2CData[1];
-} ATOM_I2C_DEVICE_SETUP_INFO;
 
-/* ========================================================================================== */
-typedef struct _ATOM_ASIC_MVDD_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_I2C_DEVICE_SETUP_INFO asI2CSetup[1];
-} ATOM_ASIC_MVDD_INFO;
+//Define one structure to inform SW how many blocks of data writing to external SS chip via I2C protocol, in addition to other information
+typedef struct _ATOM_I2C_DEVICE_SETUP_INFO
+{
+  ATOM_I2C_ID_CONFIG_ACCESS       sucI2cId;               //I2C line and HW/SW assisted cap.
+  UCHAR		                        ucSSChipID;             //SS chip being used
+  UCHAR		                        ucSSChipSlaveAddr;      //Slave Address to set up this SS chip
+  UCHAR                           ucNumOfI2CDataRecords;  //number of data block
+  ATOM_I2C_DATA_RECORD            asI2CData[1];  
+}ATOM_I2C_DEVICE_SETUP_INFO;
 
-/* ========================================================================================== */
+//==========================================================================================
+typedef struct  _ATOM_ASIC_MVDD_INFO
+{
+  ATOM_COMMON_TABLE_HEADER	      sHeader; 
+  ATOM_I2C_DEVICE_SETUP_INFO      asI2CSetup[1];
+}ATOM_ASIC_MVDD_INFO;
+
+//==========================================================================================
 #define ATOM_MCLK_SS_INFO         ATOM_ASIC_MVDD_INFO
 
-/* ========================================================================================== */
+//==========================================================================================
 /**************************************************************************/
 
-typedef struct _ATOM_ASIC_SS_ASSIGNMENT {
-	ULONG ulTargetClockRange;	/* Clock Out frequence (VCO ), in unit of 10Khz */
-	USHORT usSpreadSpectrumPercentage;	/* in unit of 0.01% */
-	USHORT usSpreadRateInKhz;	/* in unit of kHz, modulation freq */
-	UCHAR ucClockIndication;	/* Indicate which clock source needs SS */
-	UCHAR ucSpreadSpectrumMode;	/* Bit1=0 Down Spread,=1 Center Spread. */
-	UCHAR ucReserved[2];
-} ATOM_ASIC_SS_ASSIGNMENT;
+typedef struct _ATOM_ASIC_SS_ASSIGNMENT
+{
+	ULONG								ulTargetClockRange;						//Clock Out frequence (VCO ), in unit of 10Khz
+  USHORT              usSpreadSpectrumPercentage;		//in unit of 0.01%
+	USHORT							usSpreadRateInKhz;						//in unit of kHz, modulation freq
+  UCHAR               ucClockIndication;					  //Indicate which clock source needs SS
+	UCHAR								ucSpreadSpectrumMode;					//Bit1=0 Down Spread,=1 Center Spread.
+	UCHAR								ucReserved[2];
+}ATOM_ASIC_SS_ASSIGNMENT;
 
-/* Define ucSpreadSpectrumType */
+//Define ucClockIndication, SW uses the IDs below to search if the SS is requried/enabled on a clock branch/signal type.
+//SS is not required or enabled if a match is not found.
 #define ASIC_INTERNAL_MEMORY_SS			1
 #define ASIC_INTERNAL_ENGINE_SS			2
-#define ASIC_INTERNAL_UVD_SS				3
+#define ASIC_INTERNAL_UVD_SS        3
+#define ASIC_INTERNAL_SS_ON_TMDS    4
+#define ASIC_INTERNAL_SS_ON_HDMI    5
+#define ASIC_INTERNAL_SS_ON_LVDS    6
+#define ASIC_INTERNAL_SS_ON_DP      7
+#define ASIC_INTERNAL_SS_ON_DCPLL   8
 
-typedef struct _ATOM_ASIC_INTERNAL_SS_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_ASIC_SS_ASSIGNMENT asSpreadSpectrum[4];
-} ATOM_ASIC_INTERNAL_SS_INFO;
+typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V2
+{
+	ULONG								ulTargetClockRange;						//For mem/engine/uvd, Clock Out frequence (VCO ), in unit of 10Khz
+                                                    //For TMDS/HDMI/LVDS, it is pixel clock , for DP, it is link clock ( 27000 or 16200 )
+  USHORT              usSpreadSpectrumPercentage;		//in unit of 0.01%
+	USHORT							usSpreadRateIn10Hz;						//in unit of 10Hz, modulation freq
+  UCHAR               ucClockIndication;					  //Indicate which clock source needs SS
+	UCHAR								ucSpreadSpectrumMode;					//Bit0=0 Down Spread,=1 Center Spread, bit1=0: internal SS bit1=1: external SS
+	UCHAR								ucReserved[2];
+}ATOM_ASIC_SS_ASSIGNMENT_V2;
 
-/* ==============================Scratch Pad Definition Portion=============================== */
+//ucSpreadSpectrumMode
+//#define ATOM_SS_DOWN_SPREAD_MODE_MASK          0x00000000
+//#define ATOM_SS_DOWN_SPREAD_MODE               0x00000000
+//#define ATOM_SS_CENTRE_SPREAD_MODE_MASK        0x00000001
+//#define ATOM_SS_CENTRE_SPREAD_MODE             0x00000001
+//#define ATOM_INTERNAL_SS_MASK                  0x00000000
+//#define ATOM_EXTERNAL_SS_MASK                  0x00000002
+
+typedef struct _ATOM_ASIC_INTERNAL_SS_INFO
+{
+  ATOM_COMMON_TABLE_HEADER	      sHeader; 
+  ATOM_ASIC_SS_ASSIGNMENT		      asSpreadSpectrum[4];
+}ATOM_ASIC_INTERNAL_SS_INFO;
+
+typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V2
+{
+  ATOM_COMMON_TABLE_HEADER	      sHeader; 
+  ATOM_ASIC_SS_ASSIGNMENT_V2		  asSpreadSpectrum[1];      //this is point only. 
+}ATOM_ASIC_INTERNAL_SS_INFO_V2;
+
+typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3
+{
+	ULONG								ulTargetClockRange;						//For mem/engine/uvd, Clock Out frequence (VCO ), in unit of 10Khz
+                                                    //For TMDS/HDMI/LVDS, it is pixel clock , for DP, it is link clock ( 27000 or 16200 )
+  USHORT              usSpreadSpectrumPercentage;		//in unit of 0.01%
+	USHORT							usSpreadRateIn10Hz;						//in unit of 10Hz, modulation freq
+  UCHAR               ucClockIndication;					  //Indicate which clock source needs SS
+	UCHAR								ucSpreadSpectrumMode;					//Bit0=0 Down Spread,=1 Center Spread, bit1=0: internal SS bit1=1: external SS
+	UCHAR								ucReserved[2];
+}ATOM_ASIC_SS_ASSIGNMENT_V3;
+
+typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
+{
+  ATOM_COMMON_TABLE_HEADER	      sHeader; 
+  ATOM_ASIC_SS_ASSIGNMENT_V3		  asSpreadSpectrum[1];      //this is pointer only. 
+}ATOM_ASIC_INTERNAL_SS_INFO_V3;
+
+
+//==============================Scratch Pad Definition Portion===============================
 #define ATOM_DEVICE_CONNECT_INFO_DEF  0
 #define ATOM_ROM_LOCATION_DEF         1
 #define ATOM_TV_STANDARD_DEF          2
@@ -2995,7 +3852,8 @@
 #define ATOM_I2C_CHANNEL_STATUS_DEF   8
 #define ATOM_I2C_CHANNEL_STATUS1_DEF  9
 
-/*  BIOS_0_SCRATCH Definition */
+
+// BIOS_0_SCRATCH Definition 
 #define ATOM_S0_CRT1_MONO               0x00000001L
 #define ATOM_S0_CRT1_COLOR              0x00000002L
 #define ATOM_S0_CRT1_MASK               (ATOM_S0_CRT1_MONO+ATOM_S0_CRT1_COLOR)
@@ -3008,6 +3866,7 @@
 #define ATOM_S0_CV_DIN_A                0x00000020L
 #define ATOM_S0_CV_MASK_A               (ATOM_S0_CV_A+ATOM_S0_CV_DIN_A)
 
+
 #define ATOM_S0_CRT2_MONO               0x00000100L
 #define ATOM_S0_CRT2_COLOR              0x00000200L
 #define ATOM_S0_CRT2_MASK               (ATOM_S0_CRT2_MONO+ATOM_S0_CRT2_COLOR)
@@ -3025,28 +3884,27 @@
 #define ATOM_S0_DFP2                    0x00020000L
 #define ATOM_S0_LCD1                    0x00040000L
 #define ATOM_S0_LCD2                    0x00080000L
-#define ATOM_S0_TV2                     0x00100000L
-#define ATOM_S0_DFP3			0x00200000L
-#define ATOM_S0_DFP4			0x00400000L
-#define ATOM_S0_DFP5			0x00800000L
+#define ATOM_S0_DFP6                    0x00100000L
+#define ATOM_S0_DFP3                    0x00200000L
+#define ATOM_S0_DFP4                    0x00400000L
+#define ATOM_S0_DFP5                    0x00800000L
 
-#define ATOM_S0_DFP_MASK \
-	(ATOM_S0_DFP1 | ATOM_S0_DFP2 | ATOM_S0_DFP3 | ATOM_S0_DFP4 | ATOM_S0_DFP5)
+#define ATOM_S0_DFP_MASK                ATOM_S0_DFP1 | ATOM_S0_DFP2 | ATOM_S0_DFP3 | ATOM_S0_DFP4 | ATOM_S0_DFP5 | ATOM_S0_DFP6
 
-#define ATOM_S0_FAD_REGISTER_BUG        0x02000000L	/*  If set, indicates we are running a PCIE asic with */
-						    /*  the FAD/HDP reg access bug.  Bit is read by DAL */
+#define ATOM_S0_FAD_REGISTER_BUG        0x02000000L // If set, indicates we are running a PCIE asic with 
+                                                    // the FAD/HDP reg access bug.  Bit is read by DAL, this is obsolete from RV5xx
 
 #define ATOM_S0_THERMAL_STATE_MASK      0x1C000000L
 #define ATOM_S0_THERMAL_STATE_SHIFT     26
 
 #define ATOM_S0_SYSTEM_POWER_STATE_MASK 0xE0000000L
-#define ATOM_S0_SYSTEM_POWER_STATE_SHIFT 29
+#define ATOM_S0_SYSTEM_POWER_STATE_SHIFT 29 
 
 #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_AC     1
 #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_DC     2
 #define ATOM_S0_SYSTEM_POWER_STATE_VALUE_LITEAC 3
 
-/* Byte aligned definition for BIOS usage */
+//Byte aligned defintion for BIOS usage
 #define ATOM_S0_CRT1_MONOb0             0x01
 #define ATOM_S0_CRT1_COLORb0            0x02
 #define ATOM_S0_CRT1_MASKb0             (ATOM_S0_CRT1_MONOb0+ATOM_S0_CRT1_COLORb0)
@@ -3076,8 +3934,11 @@
 #define ATOM_S0_DFP2b2                  0x02
 #define ATOM_S0_LCD1b2                  0x04
 #define ATOM_S0_LCD2b2                  0x08
-#define ATOM_S0_TV2b2                   0x10
-#define ATOM_S0_DFP3b2									0x20
+#define ATOM_S0_DFP6b2                  0x10
+#define ATOM_S0_DFP3b2                  0x20
+#define ATOM_S0_DFP4b2                  0x40
+#define ATOM_S0_DFP5b2                  0x80
+
 
 #define ATOM_S0_THERMAL_STATE_MASKb3    0x1C
 #define ATOM_S0_THERMAL_STATE_SHIFTb3   2
@@ -3085,43 +3946,20 @@
 #define ATOM_S0_SYSTEM_POWER_STATE_MASKb3 0xE0
 #define ATOM_S0_LCD1_SHIFT              18
 
-/*  BIOS_1_SCRATCH Definition */
+// BIOS_1_SCRATCH Definition
 #define ATOM_S1_ROM_LOCATION_MASK       0x0000FFFFL
 #define ATOM_S1_PCI_BUS_DEV_MASK        0xFFFF0000L
 
-/*       BIOS_2_SCRATCH Definition */
+//	BIOS_2_SCRATCH Definition
 #define ATOM_S2_TV1_STANDARD_MASK       0x0000000FL
 #define ATOM_S2_CURRENT_BL_LEVEL_MASK   0x0000FF00L
 #define ATOM_S2_CURRENT_BL_LEVEL_SHIFT  8
 
-#define ATOM_S2_CRT1_DPMS_STATE         0x00010000L
-#define ATOM_S2_LCD1_DPMS_STATE	        0x00020000L
-#define ATOM_S2_TV1_DPMS_STATE          0x00040000L
-#define ATOM_S2_DFP1_DPMS_STATE         0x00080000L
-#define ATOM_S2_CRT2_DPMS_STATE         0x00100000L
-#define ATOM_S2_LCD2_DPMS_STATE         0x00200000L
-#define ATOM_S2_TV2_DPMS_STATE          0x00400000L
-#define ATOM_S2_DFP2_DPMS_STATE         0x00800000L
-#define ATOM_S2_CV_DPMS_STATE           0x01000000L
-#define ATOM_S2_DFP3_DPMS_STATE					0x02000000L
-#define ATOM_S2_DFP4_DPMS_STATE					0x04000000L
-#define ATOM_S2_DFP5_DPMS_STATE					0x08000000L
-
-#define ATOM_S2_DFP_DPM_STATE \
-	(ATOM_S2_DFP1_DPMS_STATE | ATOM_S2_DFP2_DPMS_STATE | \
-	 ATOM_S2_DFP3_DPMS_STATE | ATOM_S2_DFP4_DPMS_STATE | \
-	 ATOM_S2_DFP5_DPMS_STATE)
-
-#define ATOM_S2_DEVICE_DPMS_STATE \
-	(ATOM_S2_CRT1_DPMS_STATE + ATOM_S2_LCD1_DPMS_STATE + \
-	 ATOM_S2_TV1_DPMS_STATE + ATOM_S2_DFP_DPMS_STATE + \
-	 ATOM_S2_CRT2_DPMS_STATE + ATOM_S2_LCD2_DPMS_STATE + \
-	 ATOM_S2_TV2_DPMS_STATE + ATOM_S2_CV_DPMS_STATE)
-
 #define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASK       0x0C000000L
 #define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASK_SHIFT 26
 #define ATOM_S2_FORCEDLOWPWRMODE_STATE_CHANGE     0x10000000L
 
+#define ATOM_S2_DEVICE_DPMS_STATE       0x00010000L
 #define ATOM_S2_VRI_BRIGHT_ENABLE       0x20000000L
 
 #define ATOM_S2_DISPLAY_ROTATION_0_DEGREE     0x0
@@ -3131,21 +3969,11 @@
 #define ATOM_S2_DISPLAY_ROTATION_DEGREE_SHIFT 30
 #define ATOM_S2_DISPLAY_ROTATION_ANGLE_MASK   0xC0000000L
 
-/* Byte aligned definition for BIOS usage */
+
+//Byte aligned defintion for BIOS usage
 #define ATOM_S2_TV1_STANDARD_MASKb0     0x0F
 #define ATOM_S2_CURRENT_BL_LEVEL_MASKb1 0xFF
-#define ATOM_S2_CRT1_DPMS_STATEb2       0x01
-#define ATOM_S2_LCD1_DPMS_STATEb2       0x02
-#define ATOM_S2_TV1_DPMS_STATEb2        0x04
-#define ATOM_S2_DFP1_DPMS_STATEb2       0x08
-#define ATOM_S2_CRT2_DPMS_STATEb2       0x10
-#define ATOM_S2_LCD2_DPMS_STATEb2       0x20
-#define ATOM_S2_TV2_DPMS_STATEb2        0x40
-#define ATOM_S2_DFP2_DPMS_STATEb2       0x80
-#define ATOM_S2_CV_DPMS_STATEb3         0x01
-#define ATOM_S2_DFP3_DPMS_STATEb3				0x02
-#define ATOM_S2_DFP4_DPMS_STATEb3				0x04
-#define ATOM_S2_DFP5_DPMS_STATEb3				0x08
+#define ATOM_S2_DEVICE_DPMS_STATEb2     0x01
 
 #define ATOM_S2_DEVICE_DPMS_MASKw1      0x3FF
 #define ATOM_S2_FORCEDLOWPWRMODE_STATE_MASKb3     0x0C
@@ -3153,21 +3981,22 @@
 #define ATOM_S2_VRI_BRIGHT_ENABLEb3     0x20
 #define ATOM_S2_ROTATION_STATE_MASKb3   0xC0
 
-/*  BIOS_3_SCRATCH Definition */
+
+// BIOS_3_SCRATCH Definition
 #define ATOM_S3_CRT1_ACTIVE             0x00000001L
 #define ATOM_S3_LCD1_ACTIVE             0x00000002L
 #define ATOM_S3_TV1_ACTIVE              0x00000004L
 #define ATOM_S3_DFP1_ACTIVE             0x00000008L
 #define ATOM_S3_CRT2_ACTIVE             0x00000010L
 #define ATOM_S3_LCD2_ACTIVE             0x00000020L
-#define ATOM_S3_TV2_ACTIVE              0x00000040L
+#define ATOM_S3_DFP6_ACTIVE             0x00000040L
 #define ATOM_S3_DFP2_ACTIVE             0x00000080L
 #define ATOM_S3_CV_ACTIVE               0x00000100L
 #define ATOM_S3_DFP3_ACTIVE							0x00000200L
 #define ATOM_S3_DFP4_ACTIVE							0x00000400L
 #define ATOM_S3_DFP5_ACTIVE							0x00000800L
 
-#define ATOM_S3_DEVICE_ACTIVE_MASK      0x000003FFL
+#define ATOM_S3_DEVICE_ACTIVE_MASK      0x00000FFFL
 
 #define ATOM_S3_LCD_FULLEXPANSION_ACTIVE         0x00001000L
 #define ATOM_S3_LCD_EXPANSION_ASPEC_RATIO_ACTIVE 0x00002000L
@@ -3178,7 +4007,7 @@
 #define ATOM_S3_DFP1_CRTC_ACTIVE        0x00080000L
 #define ATOM_S3_CRT2_CRTC_ACTIVE        0x00100000L
 #define ATOM_S3_LCD2_CRTC_ACTIVE        0x00200000L
-#define ATOM_S3_TV2_CRTC_ACTIVE         0x00400000L
+#define ATOM_S3_DFP6_CRTC_ACTIVE        0x00400000L
 #define ATOM_S3_DFP2_CRTC_ACTIVE        0x00800000L
 #define ATOM_S3_CV_CRTC_ACTIVE          0x01000000L
 #define ATOM_S3_DFP3_CRTC_ACTIVE				0x02000000L
@@ -3187,17 +4016,18 @@
 
 #define ATOM_S3_DEVICE_CRTC_ACTIVE_MASK 0x0FFF0000L
 #define ATOM_S3_ASIC_GUI_ENGINE_HUNG    0x20000000L
+//Below two definitions are not supported in pplib, but in the old powerplay in DAL
 #define ATOM_S3_ALLOW_FAST_PWR_SWITCH   0x40000000L
 #define ATOM_S3_RQST_GPU_USE_MIN_PWR    0x80000000L
 
-/* Byte aligned definition for BIOS usage */
+//Byte aligned defintion for BIOS usage
 #define ATOM_S3_CRT1_ACTIVEb0           0x01
 #define ATOM_S3_LCD1_ACTIVEb0           0x02
 #define ATOM_S3_TV1_ACTIVEb0            0x04
 #define ATOM_S3_DFP1_ACTIVEb0           0x08
 #define ATOM_S3_CRT2_ACTIVEb0           0x10
 #define ATOM_S3_LCD2_ACTIVEb0           0x20
-#define ATOM_S3_TV2_ACTIVEb0            0x40
+#define ATOM_S3_DFP6_ACTIVEb0           0x40
 #define ATOM_S3_DFP2_ACTIVEb0           0x80
 #define ATOM_S3_CV_ACTIVEb1             0x01
 #define ATOM_S3_DFP3_ACTIVEb1						0x02
@@ -3212,7 +4042,7 @@
 #define ATOM_S3_DFP1_CRTC_ACTIVEb2      0x08
 #define ATOM_S3_CRT2_CRTC_ACTIVEb2      0x10
 #define ATOM_S3_LCD2_CRTC_ACTIVEb2      0x20
-#define ATOM_S3_TV2_CRTC_ACTIVEb2       0x40
+#define ATOM_S3_DFP6_CRTC_ACTIVEb2      0x40
 #define ATOM_S3_DFP2_CRTC_ACTIVEb2      0x80
 #define ATOM_S3_CV_CRTC_ACTIVEb3        0x01
 #define ATOM_S3_DFP3_CRTC_ACTIVEb3			0x02
@@ -3221,35 +4051,31 @@
 
 #define ATOM_S3_ACTIVE_CRTC2w1          0xFFF
 
-#define ATOM_S3_ASIC_GUI_ENGINE_HUNGb3	0x20
-#define ATOM_S3_ALLOW_FAST_PWR_SWITCHb3 0x40
-#define ATOM_S3_RQST_GPU_USE_MIN_PWRb3  0x80
-
-/*  BIOS_4_SCRATCH Definition */
+// BIOS_4_SCRATCH Definition
 #define ATOM_S4_LCD1_PANEL_ID_MASK      0x000000FFL
 #define ATOM_S4_LCD1_REFRESH_MASK       0x0000FF00L
 #define ATOM_S4_LCD1_REFRESH_SHIFT      8
 
-/* Byte aligned definition for BIOS usage */
+//Byte aligned defintion for BIOS usage
 #define ATOM_S4_LCD1_PANEL_ID_MASKb0	  0x0FF
 #define ATOM_S4_LCD1_REFRESH_MASKb1		  ATOM_S4_LCD1_PANEL_ID_MASKb0
 #define ATOM_S4_VRAM_INFO_MASKb2        ATOM_S4_LCD1_PANEL_ID_MASKb0
 
-/*  BIOS_5_SCRATCH Definition, BIOS_5_SCRATCH is used by Firmware only !!!! */
+// BIOS_5_SCRATCH Definition, BIOS_5_SCRATCH is used by Firmware only !!!!
 #define ATOM_S5_DOS_REQ_CRT1b0          0x01
 #define ATOM_S5_DOS_REQ_LCD1b0          0x02
 #define ATOM_S5_DOS_REQ_TV1b0           0x04
 #define ATOM_S5_DOS_REQ_DFP1b0          0x08
 #define ATOM_S5_DOS_REQ_CRT2b0          0x10
 #define ATOM_S5_DOS_REQ_LCD2b0          0x20
-#define ATOM_S5_DOS_REQ_TV2b0           0x40
+#define ATOM_S5_DOS_REQ_DFP6b0          0x40
 #define ATOM_S5_DOS_REQ_DFP2b0          0x80
 #define ATOM_S5_DOS_REQ_CVb1            0x01
 #define ATOM_S5_DOS_REQ_DFP3b1					0x02
 #define ATOM_S5_DOS_REQ_DFP4b1					0x04
 #define ATOM_S5_DOS_REQ_DFP5b1					0x08
 
-#define ATOM_S5_DOS_REQ_DEVICEw0        0x03FF
+#define ATOM_S5_DOS_REQ_DEVICEw0        0x0FFF
 
 #define ATOM_S5_DOS_REQ_CRT1            0x0001
 #define ATOM_S5_DOS_REQ_LCD1            0x0002
@@ -3257,22 +4083,21 @@
 #define ATOM_S5_DOS_REQ_DFP1            0x0008
 #define ATOM_S5_DOS_REQ_CRT2            0x0010
 #define ATOM_S5_DOS_REQ_LCD2            0x0020
-#define ATOM_S5_DOS_REQ_TV2             0x0040
+#define ATOM_S5_DOS_REQ_DFP6            0x0040
 #define ATOM_S5_DOS_REQ_DFP2            0x0080
 #define ATOM_S5_DOS_REQ_CV              0x0100
-#define ATOM_S5_DOS_REQ_DFP3						0x0200
-#define ATOM_S5_DOS_REQ_DFP4						0x0400
-#define ATOM_S5_DOS_REQ_DFP5						0x0800
+#define ATOM_S5_DOS_REQ_DFP3            0x0200
+#define ATOM_S5_DOS_REQ_DFP4            0x0400
+#define ATOM_S5_DOS_REQ_DFP5            0x0800
 
 #define ATOM_S5_DOS_FORCE_CRT1b2        ATOM_S5_DOS_REQ_CRT1b0
 #define ATOM_S5_DOS_FORCE_TV1b2         ATOM_S5_DOS_REQ_TV1b0
 #define ATOM_S5_DOS_FORCE_CRT2b2        ATOM_S5_DOS_REQ_CRT2b0
 #define ATOM_S5_DOS_FORCE_CVb3          ATOM_S5_DOS_REQ_CVb1
-#define ATOM_S5_DOS_FORCE_DEVICEw1 \
-	(ATOM_S5_DOS_FORCE_CRT1b2 + ATOM_S5_DOS_FORCE_TV1b2 + \
-	 ATOM_S5_DOS_FORCE_CRT2b2 + (ATOM_S5_DOS_FORCE_CVb3 << 8))
+#define ATOM_S5_DOS_FORCE_DEVICEw1      (ATOM_S5_DOS_FORCE_CRT1b2+ATOM_S5_DOS_FORCE_TV1b2+ATOM_S5_DOS_FORCE_CRT2b2+\
+                                        (ATOM_S5_DOS_FORCE_CVb3<<8))
 
-/*  BIOS_6_SCRATCH Definition */
+// BIOS_6_SCRATCH Definition
 #define ATOM_S6_DEVICE_CHANGE           0x00000001L
 #define ATOM_S6_SCALER_CHANGE           0x00000002L
 #define ATOM_S6_LID_CHANGE              0x00000004L
@@ -3285,11 +4110,11 @@
 #define ATOM_S6_HW_I2C_BUSY_STATE       0x00000200L
 #define ATOM_S6_THERMAL_STATE_CHANGE    0x00000400L
 #define ATOM_S6_INTERRUPT_SET_BY_BIOS   0x00000800L
-#define ATOM_S6_REQ_LCD_EXPANSION_FULL         0x00001000L	/* Normal expansion Request bit for LCD */
-#define ATOM_S6_REQ_LCD_EXPANSION_ASPEC_RATIO  0x00002000L	/* Aspect ratio expansion Request bit for LCD */
+#define ATOM_S6_REQ_LCD_EXPANSION_FULL         0x00001000L //Normal expansion Request bit for LCD
+#define ATOM_S6_REQ_LCD_EXPANSION_ASPEC_RATIO  0x00002000L //Aspect ratio expansion Request bit for LCD
 
-#define ATOM_S6_DISPLAY_STATE_CHANGE    0x00004000L	/* This bit is recycled when ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE is set,previously it's SCL2_H_expansion */
-#define ATOM_S6_I2C_STATE_CHANGE        0x00008000L	/* This bit is recycled,when ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE is set,previously it's SCL2_V_expansion */
+#define ATOM_S6_DISPLAY_STATE_CHANGE    0x00004000L        //This bit is recycled when ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE is set,previously it's SCL2_H_expansion
+#define ATOM_S6_I2C_STATE_CHANGE        0x00008000L        //This bit is recycled,when ATOM_BIOS_INFO_BIOS_SCRATCH6_SCL2_REDEFINE is set,previously it's SCL2_V_expansion
 
 #define ATOM_S6_ACC_REQ_CRT1            0x00010000L
 #define ATOM_S6_ACC_REQ_LCD1            0x00020000L
@@ -3297,7 +4122,7 @@
 #define ATOM_S6_ACC_REQ_DFP1            0x00080000L
 #define ATOM_S6_ACC_REQ_CRT2            0x00100000L
 #define ATOM_S6_ACC_REQ_LCD2            0x00200000L
-#define ATOM_S6_ACC_REQ_TV2             0x00400000L
+#define ATOM_S6_ACC_REQ_DFP6            0x00400000L
 #define ATOM_S6_ACC_REQ_DFP2            0x00800000L
 #define ATOM_S6_ACC_REQ_CV              0x01000000L
 #define ATOM_S6_ACC_REQ_DFP3						0x02000000L
@@ -3310,7 +4135,7 @@
 #define ATOM_S6_VRI_BRIGHTNESS_CHANGE       0x40000000L
 #define ATOM_S6_CONFIG_DISPLAY_CHANGE_MASK  0x80000000L
 
-/* Byte aligned definition for BIOS usage */
+//Byte aligned defintion for BIOS usage
 #define ATOM_S6_DEVICE_CHANGEb0         0x01
 #define ATOM_S6_SCALER_CHANGEb0         0x02
 #define ATOM_S6_LID_CHANGEb0            0x04
@@ -3320,11 +4145,11 @@
 #define ATOM_S6_LID_STATEb0             0x40
 #define ATOM_S6_DOCK_STATEb0            0x80
 #define ATOM_S6_CRITICAL_STATEb1        0x01
-#define ATOM_S6_HW_I2C_BUSY_STATEb1     0x02
+#define ATOM_S6_HW_I2C_BUSY_STATEb1     0x02  
 #define ATOM_S6_THERMAL_STATE_CHANGEb1  0x04
 #define ATOM_S6_INTERRUPT_SET_BY_BIOSb1 0x08
-#define ATOM_S6_REQ_LCD_EXPANSION_FULLb1        0x10
-#define ATOM_S6_REQ_LCD_EXPANSION_ASPEC_RATIOb1 0x20
+#define ATOM_S6_REQ_LCD_EXPANSION_FULLb1        0x10    
+#define ATOM_S6_REQ_LCD_EXPANSION_ASPEC_RATIOb1 0x20 
 
 #define ATOM_S6_ACC_REQ_CRT1b2          0x01
 #define ATOM_S6_ACC_REQ_LCD1b2          0x02
@@ -3332,12 +4157,12 @@
 #define ATOM_S6_ACC_REQ_DFP1b2          0x08
 #define ATOM_S6_ACC_REQ_CRT2b2          0x10
 #define ATOM_S6_ACC_REQ_LCD2b2          0x20
-#define ATOM_S6_ACC_REQ_TV2b2           0x40
+#define ATOM_S6_ACC_REQ_DFP6b2          0x40
 #define ATOM_S6_ACC_REQ_DFP2b2          0x80
 #define ATOM_S6_ACC_REQ_CVb3            0x01
-#define ATOM_S6_ACC_REQ_DFP3b3					0x02
-#define ATOM_S6_ACC_REQ_DFP4b3					0x04
-#define ATOM_S6_ACC_REQ_DFP5b3					0x08
+#define ATOM_S6_ACC_REQ_DFP3b3          0x02
+#define ATOM_S6_ACC_REQ_DFP4b3          0x04
+#define ATOM_S6_ACC_REQ_DFP5b3          0x08
 
 #define ATOM_S6_ACC_REQ_DEVICEw1        ATOM_S5_DOS_REQ_DEVICEw0
 #define ATOM_S6_SYSTEM_POWER_MODE_CHANGEb3 0x10
@@ -3366,7 +4191,7 @@
 #define ATOM_S6_VRI_BRIGHTNESS_CHANGE_SHIFT     30
 #define ATOM_S6_CONFIG_DISPLAY_CHANGE_SHIFT     31
 
-/*  BIOS_7_SCRATCH Definition, BIOS_7_SCRATCH is used by Firmware only !!!! */
+// BIOS_7_SCRATCH Definition, BIOS_7_SCRATCH is used by Firmware only !!!!
 #define ATOM_S7_DOS_MODE_TYPEb0             0x03
 #define ATOM_S7_DOS_MODE_VGAb0              0x00
 #define ATOM_S7_DOS_MODE_VESAb0             0x01
@@ -3378,220 +4203,194 @@
 
 #define ATOM_S7_DOS_8BIT_DAC_EN_SHIFT       8
 
-/*  BIOS_8_SCRATCH Definition */
+// BIOS_8_SCRATCH Definition
 #define ATOM_S8_I2C_CHANNEL_BUSY_MASK       0x00000FFFF
-#define ATOM_S8_I2C_HW_ENGINE_BUSY_MASK     0x0FFFF0000
+#define ATOM_S8_I2C_HW_ENGINE_BUSY_MASK     0x0FFFF0000   
 
 #define ATOM_S8_I2C_CHANNEL_BUSY_SHIFT      0
 #define ATOM_S8_I2C_ENGINE_BUSY_SHIFT       16
 
-/*  BIOS_9_SCRATCH Definition */
-#ifndef ATOM_S9_I2C_CHANNEL_COMPLETED_MASK
+// BIOS_9_SCRATCH Definition
+#ifndef ATOM_S9_I2C_CHANNEL_COMPLETED_MASK 
 #define ATOM_S9_I2C_CHANNEL_COMPLETED_MASK  0x0000FFFF
 #endif
-#ifndef ATOM_S9_I2C_CHANNEL_ABORTED_MASK
+#ifndef ATOM_S9_I2C_CHANNEL_ABORTED_MASK  
 #define ATOM_S9_I2C_CHANNEL_ABORTED_MASK    0xFFFF0000
 #endif
-#ifndef ATOM_S9_I2C_CHANNEL_COMPLETED_SHIFT
+#ifndef ATOM_S9_I2C_CHANNEL_COMPLETED_SHIFT 
 #define ATOM_S9_I2C_CHANNEL_COMPLETED_SHIFT 0
 #endif
-#ifndef ATOM_S9_I2C_CHANNEL_ABORTED_SHIFT
+#ifndef ATOM_S9_I2C_CHANNEL_ABORTED_SHIFT   
 #define ATOM_S9_I2C_CHANNEL_ABORTED_SHIFT   16
 #endif
 
+ 
 #define ATOM_FLAG_SET                         0x20
 #define ATOM_FLAG_CLEAR                       0
-#define CLEAR_ATOM_S6_ACC_MODE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_ACC_MODE_SHIFT | ATOM_FLAG_CLEAR)
-#define SET_ATOM_S6_DEVICE_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_DEVICE_CHANGE_SHIFT | ATOM_FLAG_SET)
-#define SET_ATOM_S6_VRI_BRIGHTNESS_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_VRI_BRIGHTNESS_CHANGE_SHIFT | ATOM_FLAG_SET)
-#define SET_ATOM_S6_SCALER_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_SCALER_CHANGE_SHIFT | ATOM_FLAG_SET)
-#define SET_ATOM_S6_LID_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_LID_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_ACC_MODE                ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_ACC_MODE_SHIFT | ATOM_FLAG_CLEAR)
+#define SET_ATOM_S6_DEVICE_CHANGE             ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_DEVICE_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_VRI_BRIGHTNESS_CHANGE     ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_VRI_BRIGHTNESS_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_SCALER_CHANGE             ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_SCALER_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_LID_CHANGE                ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_LID_CHANGE_SHIFT | ATOM_FLAG_SET)
 
-#define SET_ATOM_S6_LID_STATE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) |\
-	 ATOM_S6_LID_STATE_SHIFT | ATOM_FLAG_SET)
-#define CLEAR_ATOM_S6_LID_STATE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_LID_STATE_SHIFT | ATOM_FLAG_CLEAR)
+#define SET_ATOM_S6_LID_STATE                 ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_LID_STATE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_LID_STATE               ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_LID_STATE_SHIFT | ATOM_FLAG_CLEAR)
 
-#define SET_ATOM_S6_DOCK_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8)| \
-	 ATOM_S6_DOCKING_CHANGE_SHIFT | ATOM_FLAG_SET)
-#define SET_ATOM_S6_DOCK_STATE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_DOCK_STATE_SHIFT | ATOM_FLAG_SET)
-#define CLEAR_ATOM_S6_DOCK_STATE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_DOCK_STATE_SHIFT | ATOM_FLAG_CLEAR)
+#define SET_ATOM_S6_DOCK_CHANGE			          ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_DOCKING_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_DOCK_STATE                ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_DOCK_STATE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_DOCK_STATE              ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_DOCK_STATE_SHIFT | ATOM_FLAG_CLEAR)
 
-#define SET_ATOM_S6_THERMAL_STATE_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_THERMAL_STATE_CHANGE_SHIFT | ATOM_FLAG_SET)
-#define SET_ATOM_S6_SYSTEM_POWER_MODE_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_SYSTEM_POWER_MODE_CHANGE_SHIFT | ATOM_FLAG_SET)
-#define SET_ATOM_S6_INTERRUPT_SET_BY_BIOS \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_INTERRUPT_SET_BY_BIOS_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_THERMAL_STATE_CHANGE      ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_THERMAL_STATE_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_SYSTEM_POWER_MODE_CHANGE  ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_SYSTEM_POWER_MODE_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define SET_ATOM_S6_INTERRUPT_SET_BY_BIOS     ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_INTERRUPT_SET_BY_BIOS_SHIFT | ATOM_FLAG_SET)
 
-#define SET_ATOM_S6_CRITICAL_STATE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_CRITICAL_STATE_SHIFT | ATOM_FLAG_SET)
-#define CLEAR_ATOM_S6_CRITICAL_STATE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_CRITICAL_STATE_SHIFT | ATOM_FLAG_CLEAR)
+#define SET_ATOM_S6_CRITICAL_STATE            ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_CRITICAL_STATE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S6_CRITICAL_STATE          ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_CRITICAL_STATE_SHIFT | ATOM_FLAG_CLEAR)
 
-#define SET_ATOM_S6_REQ_SCALER \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_REQ_SCALER_SHIFT | ATOM_FLAG_SET)
-#define CLEAR_ATOM_S6_REQ_SCALER \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_REQ_SCALER_SHIFT | ATOM_FLAG_CLEAR )
+#define SET_ATOM_S6_REQ_SCALER                ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_REQ_SCALER_SHIFT | ATOM_FLAG_SET)  
+#define CLEAR_ATOM_S6_REQ_SCALER              ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_REQ_SCALER_SHIFT | ATOM_FLAG_CLEAR )
 
-#define SET_ATOM_S6_REQ_SCALER_ARATIO \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_REQ_SCALER_ARATIO_SHIFT | ATOM_FLAG_SET )
-#define CLEAR_ATOM_S6_REQ_SCALER_ARATIO \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_REQ_SCALER_ARATIO_SHIFT | ATOM_FLAG_CLEAR )
+#define SET_ATOM_S6_REQ_SCALER_ARATIO         ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_REQ_SCALER_ARATIO_SHIFT | ATOM_FLAG_SET )
+#define CLEAR_ATOM_S6_REQ_SCALER_ARATIO       ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_REQ_SCALER_ARATIO_SHIFT | ATOM_FLAG_CLEAR )
 
-#define SET_ATOM_S6_I2C_STATE_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_I2C_STATE_CHANGE_SHIFT | ATOM_FLAG_SET )
+#define SET_ATOM_S6_I2C_STATE_CHANGE          ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_I2C_STATE_CHANGE_SHIFT | ATOM_FLAG_SET )
 
-#define SET_ATOM_S6_DISPLAY_STATE_CHANGE \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_DISPLAY_STATE_CHANGE_SHIFT | ATOM_FLAG_SET )
+#define SET_ATOM_S6_DISPLAY_STATE_CHANGE      ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_DISPLAY_STATE_CHANGE_SHIFT | ATOM_FLAG_SET )
 
-#define SET_ATOM_S6_DEVICE_RECONFIG \
-	((ATOM_ACC_CHANGE_INFO_DEF << 8) | \
-	 ATOM_S6_CONFIG_DISPLAY_CHANGE_SHIFT | ATOM_FLAG_SET)
-#define CLEAR_ATOM_S0_LCD1 \
-	((ATOM_DEVICE_CONNECT_INFO_DEF << 8 ) | \
-	 ATOM_S0_LCD1_SHIFT | ATOM_FLAG_CLEAR )
-#define SET_ATOM_S7_DOS_8BIT_DAC_EN \
-	((ATOM_DOS_MODE_INFO_DEF << 8) | \
-	 ATOM_S7_DOS_8BIT_DAC_EN_SHIFT | ATOM_FLAG_SET )
-#define CLEAR_ATOM_S7_DOS_8BIT_DAC_EN \
-	((ATOM_DOS_MODE_INFO_DEF << 8) | \
-	 ATOM_S7_DOS_8BIT_DAC_EN_SHIFT | ATOM_FLAG_CLEAR )
+#define SET_ATOM_S6_DEVICE_RECONFIG           ((ATOM_ACC_CHANGE_INFO_DEF << 8 )|ATOM_S6_CONFIG_DISPLAY_CHANGE_SHIFT | ATOM_FLAG_SET)
+#define CLEAR_ATOM_S0_LCD1                    ((ATOM_DEVICE_CONNECT_INFO_DEF << 8 )|  ATOM_S0_LCD1_SHIFT | ATOM_FLAG_CLEAR )
+#define SET_ATOM_S7_DOS_8BIT_DAC_EN           ((ATOM_DOS_MODE_INFO_DEF << 8 )|ATOM_S7_DOS_8BIT_DAC_EN_SHIFT | ATOM_FLAG_SET )
+#define CLEAR_ATOM_S7_DOS_8BIT_DAC_EN         ((ATOM_DOS_MODE_INFO_DEF << 8 )|ATOM_S7_DOS_8BIT_DAC_EN_SHIFT | ATOM_FLAG_CLEAR )
 
-/****************************************************************************/
-/* Portion II: Definitinos only used in Driver */
+/****************************************************************************/	
+//Portion II: Definitinos only used in Driver
 /****************************************************************************/
 
-/*  Macros used by driver */
+// Macros used by driver
+#ifdef __cplusplus
+#define GetIndexIntoMasterTable(MasterOrData, FieldName) ((reinterpret_cast<char*>(&(static_cast<ATOM_MASTER_LIST_OF_##MasterOrData##_TABLES*>(0))->FieldName)-static_cast<char*>(0))/sizeof(USHORT))
 
-#define	GetIndexIntoMasterTable(MasterOrData, FieldName) (((char *)(&((ATOM_MASTER_LIST_OF_##MasterOrData##_TABLES *)0)->FieldName)-(char *)0)/sizeof(USHORT))
+#define GET_COMMAND_TABLE_COMMANDSET_REVISION(TABLE_HEADER_OFFSET) (((static_cast<ATOM_COMMON_TABLE_HEADER*>(TABLE_HEADER_OFFSET))->ucTableFormatRevision )&0x3F)
+#define GET_COMMAND_TABLE_PARAMETER_REVISION(TABLE_HEADER_OFFSET)  (((static_cast<ATOM_COMMON_TABLE_HEADER*>(TABLE_HEADER_OFFSET))->ucTableContentRevision)&0x3F)
+#else // not __cplusplus
+#define	GetIndexIntoMasterTable(MasterOrData, FieldName) (((char*)(&((ATOM_MASTER_LIST_OF_##MasterOrData##_TABLES*)0)->FieldName)-(char*)0)/sizeof(USHORT))
 
 #define GET_COMMAND_TABLE_COMMANDSET_REVISION(TABLE_HEADER_OFFSET) ((((ATOM_COMMON_TABLE_HEADER*)TABLE_HEADER_OFFSET)->ucTableFormatRevision)&0x3F)
 #define GET_COMMAND_TABLE_PARAMETER_REVISION(TABLE_HEADER_OFFSET)  ((((ATOM_COMMON_TABLE_HEADER*)TABLE_HEADER_OFFSET)->ucTableContentRevision)&0x3F)
+#endif // __cplusplus
 
 #define GET_DATA_TABLE_MAJOR_REVISION GET_COMMAND_TABLE_COMMANDSET_REVISION
 #define GET_DATA_TABLE_MINOR_REVISION GET_COMMAND_TABLE_PARAMETER_REVISION
 
-/****************************************************************************/
-/* Portion III: Definitinos only used in VBIOS */
+/****************************************************************************/	
+//Portion III: Definitinos only used in VBIOS
 /****************************************************************************/
 #define ATOM_DAC_SRC					0x80
 #define ATOM_SRC_DAC1					0
 #define ATOM_SRC_DAC2					0x80
 
-#ifdef	UEFI_BUILD
-#define	USHORT	UTEMP
-#endif
-
-typedef struct _MEMORY_PLLINIT_PARAMETERS {
-	ULONG ulTargetMemoryClock;	/* In 10Khz unit */
-	UCHAR ucAction;		/* not define yet */
-	UCHAR ucFbDiv_Hi;	/* Fbdiv Hi byte */
-	UCHAR ucFbDiv;		/* FB value */
-	UCHAR ucPostDiv;	/* Post div */
-} MEMORY_PLLINIT_PARAMETERS;
+typedef struct _MEMORY_PLLINIT_PARAMETERS
+{
+  ULONG ulTargetMemoryClock; //In 10Khz unit
+  UCHAR   ucAction;					 //not define yet
+  UCHAR   ucFbDiv_Hi;				 //Fbdiv Hi byte
+  UCHAR   ucFbDiv;					 //FB value
+  UCHAR   ucPostDiv;				 //Post div
+}MEMORY_PLLINIT_PARAMETERS;
 
 #define MEMORY_PLLINIT_PS_ALLOCATION  MEMORY_PLLINIT_PARAMETERS
 
-#define	GPIO_PIN_WRITE													0x01
+
+#define	GPIO_PIN_WRITE													0x01			
 #define	GPIO_PIN_READ														0x00
 
-typedef struct _GPIO_PIN_CONTROL_PARAMETERS {
-	UCHAR ucGPIO_ID;	/* return value, read from GPIO pins */
-	UCHAR ucGPIOBitShift;	/* define which bit in uGPIOBitVal need to be update */
-	UCHAR ucGPIOBitVal;	/* Set/Reset corresponding bit defined in ucGPIOBitMask */
-	UCHAR ucAction;		/* =GPIO_PIN_WRITE: Read; =GPIO_PIN_READ: Write */
-} GPIO_PIN_CONTROL_PARAMETERS;
+typedef struct  _GPIO_PIN_CONTROL_PARAMETERS
+{
+  UCHAR ucGPIO_ID;           //return value, read from GPIO pins
+  UCHAR ucGPIOBitShift;	     //define which bit in uGPIOBitVal need to be update 
+	UCHAR ucGPIOBitVal;		     //Set/Reset corresponding bit defined in ucGPIOBitMask
+  UCHAR ucAction;				     //=GPIO_PIN_WRITE: Read; =GPIO_PIN_READ: Write
+}GPIO_PIN_CONTROL_PARAMETERS;
 
-typedef struct _ENABLE_SCALER_PARAMETERS {
-	UCHAR ucScaler;		/*  ATOM_SCALER1, ATOM_SCALER2 */
-	UCHAR ucEnable;		/*  ATOM_SCALER_DISABLE or ATOM_SCALER_CENTER or ATOM_SCALER_EXPANSION */
-	UCHAR ucTVStandard;	/*  */
-	UCHAR ucPadding[1];
-} ENABLE_SCALER_PARAMETERS;
-#define ENABLE_SCALER_PS_ALLOCATION ENABLE_SCALER_PARAMETERS
+typedef struct _ENABLE_SCALER_PARAMETERS
+{
+  UCHAR ucScaler;            // ATOM_SCALER1, ATOM_SCALER2
+  UCHAR ucEnable;            // ATOM_SCALER_DISABLE or ATOM_SCALER_CENTER or ATOM_SCALER_EXPANSION
+  UCHAR ucTVStandard;        // 
+  UCHAR ucPadding[1];
+}ENABLE_SCALER_PARAMETERS; 
+#define ENABLE_SCALER_PS_ALLOCATION ENABLE_SCALER_PARAMETERS 
 
-/* ucEnable: */
+//ucEnable:
 #define SCALER_BYPASS_AUTO_CENTER_NO_REPLICATION    0
 #define SCALER_BYPASS_AUTO_CENTER_AUTO_REPLICATION  1
 #define SCALER_ENABLE_2TAP_ALPHA_MODE               2
 #define SCALER_ENABLE_MULTITAP_MODE                 3
 
-typedef struct _ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS {
-	ULONG usHWIconHorzVertPosn;	/*  Hardware Icon Vertical position */
-	UCHAR ucHWIconVertOffset;	/*  Hardware Icon Vertical offset */
-	UCHAR ucHWIconHorzOffset;	/*  Hardware Icon Horizontal offset */
-	UCHAR ucSelection;	/*  ATOM_CURSOR1 or ATOM_ICON1 or ATOM_CURSOR2 or ATOM_ICON2 */
-	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
-} ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS;
+typedef struct _ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS
+{
+  ULONG  usHWIconHorzVertPosn;        // Hardware Icon Vertical position
+  UCHAR  ucHWIconVertOffset;          // Hardware Icon Vertical offset
+  UCHAR  ucHWIconHorzOffset;          // Hardware Icon Horizontal offset
+  UCHAR  ucSelection;                 // ATOM_CURSOR1 or ATOM_ICON1 or ATOM_CURSOR2 or ATOM_ICON2
+  UCHAR  ucEnable;                    // ATOM_ENABLE or ATOM_DISABLE
+}ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS;
 
-typedef struct _ENABLE_HARDWARE_ICON_CURSOR_PS_ALLOCATION {
-	ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS sEnableIcon;
-	ENABLE_CRTC_PARAMETERS sReserved;
-} ENABLE_HARDWARE_ICON_CURSOR_PS_ALLOCATION;
+typedef struct _ENABLE_HARDWARE_ICON_CURSOR_PS_ALLOCATION
+{
+  ENABLE_HARDWARE_ICON_CURSOR_PARAMETERS  sEnableIcon;
+  ENABLE_CRTC_PARAMETERS                  sReserved;  
+}ENABLE_HARDWARE_ICON_CURSOR_PS_ALLOCATION;
 
-typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS {
-	USHORT usHight;		/*  Image Hight */
-	USHORT usWidth;		/*  Image Width */
-	UCHAR ucSurface;	/*  Surface 1 or 2 */
-	UCHAR ucPadding[3];
-} ENABLE_GRAPH_SURFACE_PARAMETERS;
+typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS
+{
+  USHORT usHight;                     // Image Hight
+  USHORT usWidth;                     // Image Width
+  UCHAR  ucSurface;                   // Surface 1 or 2	
+  UCHAR  ucPadding[3];
+}ENABLE_GRAPH_SURFACE_PARAMETERS;
 
-typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_2 {
-	USHORT usHight;		/*  Image Hight */
-	USHORT usWidth;		/*  Image Width */
-	UCHAR ucSurface;	/*  Surface 1 or 2 */
-	UCHAR ucEnable;		/*  ATOM_ENABLE or ATOM_DISABLE */
-	UCHAR ucPadding[2];
-} ENABLE_GRAPH_SURFACE_PARAMETERS_V1_2;
+typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_2
+{
+  USHORT usHight;                     // Image Hight
+  USHORT usWidth;                     // Image Width
+  UCHAR  ucSurface;                   // Surface 1 or 2
+  UCHAR  ucEnable;                    // ATOM_ENABLE or ATOM_DISABLE
+  UCHAR  ucPadding[2];
+}ENABLE_GRAPH_SURFACE_PARAMETERS_V1_2;
 
-typedef struct _ENABLE_GRAPH_SURFACE_PS_ALLOCATION {
-	ENABLE_GRAPH_SURFACE_PARAMETERS sSetSurface;
-	ENABLE_YUV_PS_ALLOCATION sReserved;	/*  Don't set this one */
-} ENABLE_GRAPH_SURFACE_PS_ALLOCATION;
+typedef struct _ENABLE_GRAPH_SURFACE_PARAMETERS_V1_3
+{
+  USHORT usHight;                     // Image Hight
+  USHORT usWidth;                     // Image Width
+  UCHAR  ucSurface;                   // Surface 1 or 2
+  UCHAR  ucEnable;                    // ATOM_ENABLE or ATOM_DISABLE
+  USHORT usDeviceId;                  // Active Device Id for this surface. If no device, set to 0. 
+}ENABLE_GRAPH_SURFACE_PARAMETERS_V1_3;
 
-typedef struct _MEMORY_CLEAN_UP_PARAMETERS {
-	USHORT usMemoryStart;	/* in 8Kb boundry, offset from memory base address */
-	USHORT usMemorySize;	/* 8Kb blocks aligned */
-} MEMORY_CLEAN_UP_PARAMETERS;
+typedef struct _ENABLE_GRAPH_SURFACE_PS_ALLOCATION
+{
+  ENABLE_GRAPH_SURFACE_PARAMETERS sSetSurface;          
+  ENABLE_YUV_PS_ALLOCATION        sReserved; // Don't set this one
+}ENABLE_GRAPH_SURFACE_PS_ALLOCATION;
+
+typedef struct _MEMORY_CLEAN_UP_PARAMETERS
+{
+  USHORT  usMemoryStart;                //in 8Kb boundry, offset from memory base address
+  USHORT  usMemorySize;                 //8Kb blocks aligned
+}MEMORY_CLEAN_UP_PARAMETERS;
 #define MEMORY_CLEAN_UP_PS_ALLOCATION MEMORY_CLEAN_UP_PARAMETERS
 
-typedef struct _GET_DISPLAY_SURFACE_SIZE_PARAMETERS {
-	USHORT usX_Size;	/* When use as input parameter, usX_Size indicates which CRTC */
-	USHORT usY_Size;
-} GET_DISPLAY_SURFACE_SIZE_PARAMETERS;
+typedef struct  _GET_DISPLAY_SURFACE_SIZE_PARAMETERS
+{
+  USHORT  usX_Size;                     //When use as input parameter, usX_Size indicates which CRTC                 
+  USHORT  usY_Size;
+}GET_DISPLAY_SURFACE_SIZE_PARAMETERS; 
 
-typedef struct _INDIRECT_IO_ACCESS {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR IOAccessSequence[256];
+typedef struct _INDIRECT_IO_ACCESS
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  UCHAR                    IOAccessSequence[256];
 } INDIRECT_IO_ACCESS;
 
 #define INDIRECT_READ              0x00
@@ -3615,93 +4414,108 @@
 #define INDIRECT_IO_NBMISC_READ    INDIRECT_IO_NBMISC | INDIRECT_READ
 #define INDIRECT_IO_NBMISC_WRITE   INDIRECT_IO_NBMISC | INDIRECT_WRITE
 
-typedef struct _ATOM_OEM_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
-} ATOM_OEM_INFO;
+typedef struct _ATOM_OEM_INFO
+{ 
+  ATOM_COMMON_TABLE_HEADER	sHeader;
+  ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
+}ATOM_OEM_INFO;
 
-typedef struct _ATOM_TV_MODE {
-	UCHAR ucVMode_Num;	/* Video mode number */
-	UCHAR ucTV_Mode_Num;	/* Internal TV mode number */
-} ATOM_TV_MODE;
+typedef struct _ATOM_TV_MODE
+{
+   UCHAR	ucVMode_Num;			  //Video mode number
+   UCHAR	ucTV_Mode_Num;			//Internal TV mode number
+}ATOM_TV_MODE;
 
-typedef struct _ATOM_BIOS_INT_TVSTD_MODE {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usTV_Mode_LUT_Offset;	/*  Pointer to standard to internal number conversion table */
-	USHORT usTV_FIFO_Offset;	/*  Pointer to FIFO entry table */
-	USHORT usNTSC_Tbl_Offset;	/*  Pointer to SDTV_Mode_NTSC table */
-	USHORT usPAL_Tbl_Offset;	/*  Pointer to SDTV_Mode_PAL table */
-	USHORT usCV_Tbl_Offset;	/*  Pointer to SDTV_Mode_PAL table */
-} ATOM_BIOS_INT_TVSTD_MODE;
+typedef struct _ATOM_BIOS_INT_TVSTD_MODE
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+   USHORT	usTV_Mode_LUT_Offset;	// Pointer to standard to internal number conversion table
+   USHORT	usTV_FIFO_Offset;		  // Pointer to FIFO entry table
+   USHORT	usNTSC_Tbl_Offset;		// Pointer to SDTV_Mode_NTSC table
+   USHORT	usPAL_Tbl_Offset;		  // Pointer to SDTV_Mode_PAL table 
+   USHORT	usCV_Tbl_Offset;		  // Pointer to SDTV_Mode_PAL table 
+}ATOM_BIOS_INT_TVSTD_MODE;
 
-typedef struct _ATOM_TV_MODE_SCALER_PTR {
-	USHORT ucFilter0_Offset;	/* Pointer to filter format 0 coefficients */
-	USHORT usFilter1_Offset;	/* Pointer to filter format 0 coefficients */
-	UCHAR ucTV_Mode_Num;
-} ATOM_TV_MODE_SCALER_PTR;
 
-typedef struct _ATOM_STANDARD_VESA_TIMING {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_DTD_FORMAT aModeTimings[16];	/*  16 is not the real array number, just for initial allocation */
-} ATOM_STANDARD_VESA_TIMING;
+typedef struct _ATOM_TV_MODE_SCALER_PTR
+{
+   USHORT	ucFilter0_Offset;		//Pointer to filter format 0 coefficients
+   USHORT	usFilter1_Offset;		//Pointer to filter format 0 coefficients
+   UCHAR	ucTV_Mode_Num;
+}ATOM_TV_MODE_SCALER_PTR;
 
-typedef struct _ATOM_STD_FORMAT {
-	USHORT usSTD_HDisp;
-	USHORT usSTD_VDisp;
-	USHORT usSTD_RefreshRate;
-	USHORT usReserved;
-} ATOM_STD_FORMAT;
+typedef struct _ATOM_STANDARD_VESA_TIMING
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  ATOM_DTD_FORMAT 				 aModeTimings[16];      // 16 is not the real array number, just for initial allocation
+}ATOM_STANDARD_VESA_TIMING;
 
-typedef struct _ATOM_VESA_TO_EXTENDED_MODE {
-	USHORT usVESA_ModeNumber;
-	USHORT usExtendedModeNumber;
-} ATOM_VESA_TO_EXTENDED_MODE;
 
-typedef struct _ATOM_VESA_TO_INTENAL_MODE_LUT {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	ATOM_VESA_TO_EXTENDED_MODE asVESA_ToExtendedModeInfo[76];
-} ATOM_VESA_TO_INTENAL_MODE_LUT;
+typedef struct _ATOM_STD_FORMAT
+{ 
+  USHORT    usSTD_HDisp;
+  USHORT    usSTD_VDisp;
+  USHORT    usSTD_RefreshRate;
+  USHORT    usReserved;
+}ATOM_STD_FORMAT;
+
+typedef struct _ATOM_VESA_TO_EXTENDED_MODE
+{
+  USHORT  usVESA_ModeNumber;
+  USHORT  usExtendedModeNumber;
+}ATOM_VESA_TO_EXTENDED_MODE;
+
+typedef struct _ATOM_VESA_TO_INTENAL_MODE_LUT
+{ 
+  ATOM_COMMON_TABLE_HEADER   sHeader;  
+  ATOM_VESA_TO_EXTENDED_MODE asVESA_ToExtendedModeInfo[76];
+}ATOM_VESA_TO_INTENAL_MODE_LUT;
 
 /*************** ATOM Memory Related Data Structure ***********************/
-typedef struct _ATOM_MEMORY_VENDOR_BLOCK {
-	UCHAR ucMemoryType;
-	UCHAR ucMemoryVendor;
-	UCHAR ucAdjMCId;
-	UCHAR ucDynClkId;
-	ULONG ulDllResetClkRange;
-} ATOM_MEMORY_VENDOR_BLOCK;
+typedef struct _ATOM_MEMORY_VENDOR_BLOCK{
+	UCHAR												ucMemoryType;
+	UCHAR												ucMemoryVendor;
+	UCHAR												ucAdjMCId;
+	UCHAR												ucDynClkId;
+	ULONG												ulDllResetClkRange;
+}ATOM_MEMORY_VENDOR_BLOCK;
 
-typedef struct _ATOM_MEMORY_SETTING_ID_CONFIG {
+
+typedef struct _ATOM_MEMORY_SETTING_ID_CONFIG{
 #if ATOM_BIG_ENDIAN
-	ULONG ucMemBlkId:8;
-	ULONG ulMemClockRange:24;
+	ULONG												ucMemBlkId:8;
+	ULONG												ulMemClockRange:24;
 #else
-	ULONG ulMemClockRange:24;
-	ULONG ucMemBlkId:8;
+	ULONG												ulMemClockRange:24;
+	ULONG												ucMemBlkId:8;
 #endif
-} ATOM_MEMORY_SETTING_ID_CONFIG;
+}ATOM_MEMORY_SETTING_ID_CONFIG;
 
-typedef union _ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS {
-	ATOM_MEMORY_SETTING_ID_CONFIG slAccess;
-	ULONG ulAccess;
-} ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS;
+typedef union _ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS
+{
+  ATOM_MEMORY_SETTING_ID_CONFIG slAccess;
+  ULONG                         ulAccess;
+}ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS;
 
-typedef struct _ATOM_MEMORY_SETTING_DATA_BLOCK {
-	ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS ulMemoryID;
-	ULONG aulMemData[1];
-} ATOM_MEMORY_SETTING_DATA_BLOCK;
 
-typedef struct _ATOM_INIT_REG_INDEX_FORMAT {
-	USHORT usRegIndex;	/*  MC register index */
-	UCHAR ucPreRegDataLength;	/*  offset in ATOM_INIT_REG_DATA_BLOCK.saRegDataBuf */
-} ATOM_INIT_REG_INDEX_FORMAT;
+typedef struct _ATOM_MEMORY_SETTING_DATA_BLOCK{
+	ATOM_MEMORY_SETTING_ID_CONFIG_ACCESS			ulMemoryID;
+	ULONG															        aulMemData[1];
+}ATOM_MEMORY_SETTING_DATA_BLOCK;
 
-typedef struct _ATOM_INIT_REG_BLOCK {
-	USHORT usRegIndexTblSize;	/* size of asRegIndexBuf */
-	USHORT usRegDataBlkSize;	/* size of ATOM_MEMORY_SETTING_DATA_BLOCK */
-	ATOM_INIT_REG_INDEX_FORMAT asRegIndexBuf[1];
-	ATOM_MEMORY_SETTING_DATA_BLOCK asRegDataBuf[1];
-} ATOM_INIT_REG_BLOCK;
+
+typedef struct _ATOM_INIT_REG_INDEX_FORMAT{
+	 USHORT											usRegIndex;                                     // MC register index
+	 UCHAR											ucPreRegDataLength;                             // offset in ATOM_INIT_REG_DATA_BLOCK.saRegDataBuf
+}ATOM_INIT_REG_INDEX_FORMAT;
+
+
+typedef struct _ATOM_INIT_REG_BLOCK{
+	USHORT													usRegIndexTblSize;													//size of asRegIndexBuf
+	USHORT													usRegDataBlkSize;														//size of ATOM_MEMORY_SETTING_DATA_BLOCK
+	ATOM_INIT_REG_INDEX_FORMAT			asRegIndexBuf[1];
+	ATOM_MEMORY_SETTING_DATA_BLOCK	asRegDataBuf[1];
+}ATOM_INIT_REG_BLOCK;
 
 #define END_OF_REG_INDEX_BLOCK  0x0ffff
 #define END_OF_REG_DATA_BLOCK   0x00000000
@@ -3716,16 +4530,19 @@
 #define INDEX_ACCESS_RANGE_END		    (INDEX_ACCESS_RANGE_BEGIN + 1)
 #define VALUE_INDEX_ACCESS_SINGLE	    (INDEX_ACCESS_RANGE_END + 1)
 
-typedef struct _ATOM_MC_INIT_PARAM_TABLE {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usAdjustARB_SEQDataOffset;
-	USHORT usMCInitMemTypeTblOffset;
-	USHORT usMCInitCommonTblOffset;
-	USHORT usMCInitPowerDownTblOffset;
-	ULONG ulARB_SEQDataBuf[32];
-	ATOM_INIT_REG_BLOCK asMCInitMemType;
-	ATOM_INIT_REG_BLOCK asMCInitCommon;
-} ATOM_MC_INIT_PARAM_TABLE;
+
+typedef struct _ATOM_MC_INIT_PARAM_TABLE
+{ 
+  ATOM_COMMON_TABLE_HEADER		sHeader;
+  USHORT											usAdjustARB_SEQDataOffset;
+  USHORT											usMCInitMemTypeTblOffset;
+  USHORT											usMCInitCommonTblOffset;
+  USHORT											usMCInitPowerDownTblOffset;
+	ULONG												ulARB_SEQDataBuf[32];
+	ATOM_INIT_REG_BLOCK					asMCInitMemType;
+	ATOM_INIT_REG_BLOCK					asMCInitCommon;
+}ATOM_MC_INIT_PARAM_TABLE;
+
 
 #define _4Mx16              0x2
 #define _4Mx32              0x3
@@ -3751,221 +4568,272 @@
 
 #define QIMONDA             INFINEON
 #define PROMOS              MOSEL
+#define KRETON              INFINEON
 
-/* ///////////Support for GDDR5 MC uCode to reside in upper 64K of ROM///////////// */
+/////////////Support for GDDR5 MC uCode to reside in upper 64K of ROM/////////////
 
 #define UCODE_ROM_START_ADDRESS		0x1c000
-#define	UCODE_SIGNATURE			0x4375434d	/*  'MCuC' - MC uCode */
+#define	UCODE_SIGNATURE			0x4375434d // 'MCuC' - MC uCode
 
-/* uCode block header for reference */
+//uCode block header for reference
 
-typedef struct _MCuCodeHeader {
-	ULONG ulSignature;
-	UCHAR ucRevision;
-	UCHAR ucChecksum;
-	UCHAR ucReserved1;
-	UCHAR ucReserved2;
-	USHORT usParametersLength;
-	USHORT usUCodeLength;
-	USHORT usReserved1;
-	USHORT usReserved2;
+typedef struct _MCuCodeHeader
+{
+  ULONG  ulSignature;
+  UCHAR  ucRevision;
+  UCHAR  ucChecksum;
+  UCHAR  ucReserved1;
+  UCHAR  ucReserved2;
+  USHORT usParametersLength;
+  USHORT usUCodeLength;
+  USHORT usReserved1;
+  USHORT usReserved2;
 } MCuCodeHeader;
 
-/* //////////////////////////////////////////////////////////////////////////////// */
+//////////////////////////////////////////////////////////////////////////////////
 
 #define ATOM_MAX_NUMBER_OF_VRAM_MODULE	16
 
 #define ATOM_VRAM_MODULE_MEMORY_VENDOR_ID_MASK	0xF
-typedef struct _ATOM_VRAM_MODULE_V1 {
-	ULONG ulReserved;
-	USHORT usEMRSValue;
-	USHORT usMRSValue;
-	USHORT usReserved;
-	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
-	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] reserved; */
-	UCHAR ucMemoryVenderID;	/*  Predefined,never change across designs or memory type/vender */
-	UCHAR ucMemoryDeviceCfg;	/*  [7:4]=0x0:4M;=0x1:8M;=0x2:16M;0x3:32M....[3:0]=0x0:x4;=0x1:x8;=0x2:x16;=0x3:x32... */
-	UCHAR ucRow;		/*  Number of Row,in power of 2; */
-	UCHAR ucColumn;		/*  Number of Column,in power of 2; */
-	UCHAR ucBank;		/*  Nunber of Bank; */
-	UCHAR ucRank;		/*  Number of Rank, in power of 2 */
-	UCHAR ucChannelNum;	/*  Number of channel; */
-	UCHAR ucChannelConfig;	/*  [3:0]=Indication of what channel combination;[4:7]=Channel bit width, in number of 2 */
-	UCHAR ucDefaultMVDDQ_ID;	/*  Default MVDDQ setting for this memory block, ID linking to MVDDQ info table to find real set-up data; */
-	UCHAR ucDefaultMVDDC_ID;	/*  Default MVDDC setting for this memory block, ID linking to MVDDC info table to find real set-up data; */
-	UCHAR ucReserved[2];
-} ATOM_VRAM_MODULE_V1;
+typedef struct _ATOM_VRAM_MODULE_V1
+{
+  ULONG                      ulReserved;
+  USHORT                     usEMRSValue;  
+  USHORT                     usMRSValue;
+  USHORT                     usReserved;
+  UCHAR                      ucExtMemoryID;     // An external indicator (by hardcode, callback or pin) to tell what is the current memory module
+  UCHAR                      ucMemoryType;      // [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] reserved;
+  UCHAR                      ucMemoryVenderID;  // Predefined,never change across designs or memory type/vender 
+  UCHAR                      ucMemoryDeviceCfg; // [7:4]=0x0:4M;=0x1:8M;=0x2:16M;0x3:32M....[3:0]=0x0:x4;=0x1:x8;=0x2:x16;=0x3:x32...
+  UCHAR                      ucRow;             // Number of Row,in power of 2;
+  UCHAR                      ucColumn;          // Number of Column,in power of 2;
+  UCHAR                      ucBank;            // Nunber of Bank;
+  UCHAR                      ucRank;            // Number of Rank, in power of 2
+  UCHAR                      ucChannelNum;      // Number of channel;
+  UCHAR                      ucChannelConfig;   // [3:0]=Indication of what channel combination;[4:7]=Channel bit width, in number of 2
+  UCHAR                      ucDefaultMVDDQ_ID; // Default MVDDQ setting for this memory block, ID linking to MVDDQ info table to find real set-up data;
+  UCHAR                      ucDefaultMVDDC_ID; // Default MVDDC setting for this memory block, ID linking to MVDDC info table to find real set-up data;
+  UCHAR                      ucReserved[2];
+}ATOM_VRAM_MODULE_V1;
 
-typedef struct _ATOM_VRAM_MODULE_V2 {
-	ULONG ulReserved;
-	ULONG ulFlags;		/*  To enable/disable functionalities based on memory type */
-	ULONG ulEngineClock;	/*  Override of default engine clock for particular memory type */
-	ULONG ulMemoryClock;	/*  Override of default memory clock for particular memory type */
-	USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
-	USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
-	USHORT usEMRSValue;
-	USHORT usMRSValue;
-	USHORT usReserved;
-	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
-	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] - must not be used for now; */
-	UCHAR ucMemoryVenderID;	/*  Predefined,never change across designs or memory type/vender. If not predefined, vendor detection table gets executed */
-	UCHAR ucMemoryDeviceCfg;	/*  [7:4]=0x0:4M;=0x1:8M;=0x2:16M;0x3:32M....[3:0]=0x0:x4;=0x1:x8;=0x2:x16;=0x3:x32... */
-	UCHAR ucRow;		/*  Number of Row,in power of 2; */
-	UCHAR ucColumn;		/*  Number of Column,in power of 2; */
-	UCHAR ucBank;		/*  Nunber of Bank; */
-	UCHAR ucRank;		/*  Number of Rank, in power of 2 */
-	UCHAR ucChannelNum;	/*  Number of channel; */
-	UCHAR ucChannelConfig;	/*  [3:0]=Indication of what channel combination;[4:7]=Channel bit width, in number of 2 */
-	UCHAR ucDefaultMVDDQ_ID;	/*  Default MVDDQ setting for this memory block, ID linking to MVDDQ info table to find real set-up data; */
-	UCHAR ucDefaultMVDDC_ID;	/*  Default MVDDC setting for this memory block, ID linking to MVDDC info table to find real set-up data; */
-	UCHAR ucRefreshRateFactor;
-	UCHAR ucReserved[3];
-} ATOM_VRAM_MODULE_V2;
 
-typedef struct _ATOM_MEMORY_TIMING_FORMAT {
-	ULONG ulClkRange;	/*  memory clock in 10kHz unit, when target memory clock is below this clock, use this memory timing */
-	union {
-		USHORT usMRS;	/*  mode register */
-		USHORT usDDR3_MR0;
-	};
-	union {
-		USHORT usEMRS;	/*  extended mode register */
-		USHORT usDDR3_MR1;
-	};
-	UCHAR ucCL;		/*  CAS latency */
-	UCHAR ucWL;		/*  WRITE Latency */
-	UCHAR uctRAS;		/*  tRAS */
-	UCHAR uctRC;		/*  tRC */
-	UCHAR uctRFC;		/*  tRFC */
-	UCHAR uctRCDR;		/*  tRCDR */
-	UCHAR uctRCDW;		/*  tRCDW */
-	UCHAR uctRP;		/*  tRP */
-	UCHAR uctRRD;		/*  tRRD */
-	UCHAR uctWR;		/*  tWR */
-	UCHAR uctWTR;		/*  tWTR */
-	UCHAR uctPDIX;		/*  tPDIX */
-	UCHAR uctFAW;		/*  tFAW */
-	UCHAR uctAOND;		/*  tAOND */
-	union {
-		struct {
-			UCHAR ucflag;	/*  flag to control memory timing calculation. bit0= control EMRS2 Infineon */
-			UCHAR ucReserved;
-		};
-		USHORT usDDR3_MR2;
-	};
-} ATOM_MEMORY_TIMING_FORMAT;
+typedef struct _ATOM_VRAM_MODULE_V2
+{
+  ULONG                      ulReserved;
+  ULONG                      ulFlags;     			// To enable/disable functionalities based on memory type
+  ULONG                      ulEngineClock;     // Override of default engine clock for particular memory type
+  ULONG                      ulMemoryClock;     // Override of default memory clock for particular memory type
+  USHORT                     usEMRS2Value;      // EMRS2 Value is used for GDDR2 and GDDR4 memory type
+  USHORT                     usEMRS3Value;      // EMRS3 Value is used for GDDR2 and GDDR4 memory type
+  USHORT                     usEMRSValue;  
+  USHORT                     usMRSValue;
+  USHORT                     usReserved;
+  UCHAR                      ucExtMemoryID;     // An external indicator (by hardcode, callback or pin) to tell what is the current memory module
+  UCHAR                      ucMemoryType;      // [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] - must not be used for now;
+  UCHAR                      ucMemoryVenderID;  // Predefined,never change across designs or memory type/vender. If not predefined, vendor detection table gets executed
+  UCHAR                      ucMemoryDeviceCfg; // [7:4]=0x0:4M;=0x1:8M;=0x2:16M;0x3:32M....[3:0]=0x0:x4;=0x1:x8;=0x2:x16;=0x3:x32...
+  UCHAR                      ucRow;             // Number of Row,in power of 2;
+  UCHAR                      ucColumn;          // Number of Column,in power of 2;
+  UCHAR                      ucBank;            // Nunber of Bank;
+  UCHAR                      ucRank;            // Number of Rank, in power of 2
+  UCHAR                      ucChannelNum;      // Number of channel;
+  UCHAR                      ucChannelConfig;   // [3:0]=Indication of what channel combination;[4:7]=Channel bit width, in number of 2
+  UCHAR                      ucDefaultMVDDQ_ID; // Default MVDDQ setting for this memory block, ID linking to MVDDQ info table to find real set-up data;
+  UCHAR                      ucDefaultMVDDC_ID; // Default MVDDC setting for this memory block, ID linking to MVDDC info table to find real set-up data;
+  UCHAR                      ucRefreshRateFactor;
+  UCHAR                      ucReserved[3];
+}ATOM_VRAM_MODULE_V2;
 
-typedef struct _ATOM_MEMORY_TIMING_FORMAT_V1 {
-	ULONG ulClkRange;	/*  memory clock in 10kHz unit, when target memory clock is below this clock, use this memory timing */
-	USHORT usMRS;		/*  mode register */
-	USHORT usEMRS;		/*  extended mode register */
-	UCHAR ucCL;		/*  CAS latency */
-	UCHAR ucWL;		/*  WRITE Latency */
-	UCHAR uctRAS;		/*  tRAS */
-	UCHAR uctRC;		/*  tRC */
-	UCHAR uctRFC;		/*  tRFC */
-	UCHAR uctRCDR;		/*  tRCDR */
-	UCHAR uctRCDW;		/*  tRCDW */
-	UCHAR uctRP;		/*  tRP */
-	UCHAR uctRRD;		/*  tRRD */
-	UCHAR uctWR;		/*  tWR */
-	UCHAR uctWTR;		/*  tWTR */
-	UCHAR uctPDIX;		/*  tPDIX */
-	UCHAR uctFAW;		/*  tFAW */
-	UCHAR uctAOND;		/*  tAOND */
-	UCHAR ucflag;		/*  flag to control memory timing calculation. bit0= control EMRS2 Infineon */
-/* ///////////////////////GDDR parameters/////////////////////////////////// */
-	UCHAR uctCCDL;		/*  */
-	UCHAR uctCRCRL;		/*  */
-	UCHAR uctCRCWL;		/*  */
-	UCHAR uctCKE;		/*  */
-	UCHAR uctCKRSE;		/*  */
-	UCHAR uctCKRSX;		/*  */
-	UCHAR uctFAW32;		/*  */
-	UCHAR ucReserved1;	/*  */
-	UCHAR ucReserved2;	/*  */
-	UCHAR ucTerminator;
-} ATOM_MEMORY_TIMING_FORMAT_V1;
 
-typedef struct _ATOM_MEMORY_FORMAT {
-	ULONG ulDllDisClock;	/*  memory DLL will be disable when target memory clock is below this clock */
-	union {
-		USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
-		USHORT usDDR3_Reserved;	/*  Not used for DDR3 memory */
-	};
-	union {
-		USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
-		USHORT usDDR3_MR3;	/*  Used for DDR3 memory */
-	};
-	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] - must not be used for now; */
-	UCHAR ucMemoryVenderID;	/*  Predefined,never change across designs or memory type/vender. If not predefined, vendor detection table gets executed */
-	UCHAR ucRow;		/*  Number of Row,in power of 2; */
-	UCHAR ucColumn;		/*  Number of Column,in power of 2; */
-	UCHAR ucBank;		/*  Nunber of Bank; */
-	UCHAR ucRank;		/*  Number of Rank, in power of 2 */
-	UCHAR ucBurstSize;	/*  burst size, 0= burst size=4  1= burst size=8 */
-	UCHAR ucDllDisBit;	/*  position of DLL Enable/Disable bit in EMRS ( Extended Mode Register ) */
-	UCHAR ucRefreshRateFactor;	/*  memory refresh rate in unit of ms */
-	UCHAR ucDensity;	/*  _8Mx32, _16Mx32, _16Mx16, _32Mx16 */
-	UCHAR ucPreamble;	/* [7:4] Write Preamble, [3:0] Read Preamble */
-	UCHAR ucMemAttrib;	/*  Memory Device Addribute, like RDBI/WDBI etc */
-	ATOM_MEMORY_TIMING_FORMAT asMemTiming[5];	/* Memory Timing block sort from lower clock to higher clock */
-} ATOM_MEMORY_FORMAT;
+typedef	struct _ATOM_MEMORY_TIMING_FORMAT
+{
+	ULONG											 ulClkRange;				// memory clock in 10kHz unit, when target memory clock is below this clock, use this memory timing 	
+  union{
+	  USHORT										 usMRS;							// mode register						
+    USHORT                     usDDR3_MR0;
+  };
+  union{
+	  USHORT										 usEMRS;						// extended mode register
+    USHORT                     usDDR3_MR1;
+  };
+	UCHAR											 ucCL;							// CAS latency
+	UCHAR											 ucWL;							// WRITE Latency				
+	UCHAR											 uctRAS;						// tRAS
+	UCHAR											 uctRC;							// tRC	
+	UCHAR											 uctRFC;						// tRFC
+	UCHAR											 uctRCDR;						// tRCDR	
+	UCHAR											 uctRCDW;						// tRCDW
+	UCHAR											 uctRP;							// tRP
+	UCHAR											 uctRRD;						// tRRD	
+	UCHAR											 uctWR;							// tWR
+	UCHAR											 uctWTR;						// tWTR
+	UCHAR											 uctPDIX;						// tPDIX
+	UCHAR											 uctFAW;						// tFAW
+	UCHAR											 uctAOND;						// tAOND
+  union 
+  {
+    struct {
+	    UCHAR											 ucflag;						// flag to control memory timing calculation. bit0= control EMRS2 Infineon 
+	    UCHAR											 ucReserved;						
+    };
+    USHORT                   usDDR3_MR2;
+  };
+}ATOM_MEMORY_TIMING_FORMAT;
 
-typedef struct _ATOM_VRAM_MODULE_V3 {
-	ULONG ulChannelMapCfg;	/*  board dependent paramenter:Channel combination */
-	USHORT usSize;		/*  size of ATOM_VRAM_MODULE_V3 */
-	USHORT usDefaultMVDDQ;	/*  board dependent parameter:Default Memory Core Voltage */
-	USHORT usDefaultMVDDC;	/*  board dependent parameter:Default Memory IO Voltage */
-	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
-	UCHAR ucChannelNum;	/*  board dependent parameter:Number of channel; */
-	UCHAR ucChannelSize;	/*  board dependent parameter:32bit or 64bit */
-	UCHAR ucVREFI;		/*  board dependnt parameter: EXT or INT +160mv to -140mv */
-	UCHAR ucNPL_RT;		/*  board dependent parameter:NPL round trip delay, used for calculate memory timing parameters */
-	UCHAR ucFlag;		/*  To enable/disable functionalities based on memory type */
-	ATOM_MEMORY_FORMAT asMemory;	/*  describ all of video memory parameters from memory spec */
-} ATOM_VRAM_MODULE_V3;
 
-/* ATOM_VRAM_MODULE_V3.ucNPL_RT */
+typedef	struct _ATOM_MEMORY_TIMING_FORMAT_V1
+{
+	ULONG											 ulClkRange;				// memory clock in 10kHz unit, when target memory clock is below this clock, use this memory timing 	
+	USHORT										 usMRS;							// mode register						
+	USHORT										 usEMRS;						// extended mode register
+	UCHAR											 ucCL;							// CAS latency
+	UCHAR											 ucWL;							// WRITE Latency				
+	UCHAR											 uctRAS;						// tRAS
+	UCHAR											 uctRC;							// tRC	
+	UCHAR											 uctRFC;						// tRFC
+	UCHAR											 uctRCDR;						// tRCDR	
+	UCHAR											 uctRCDW;						// tRCDW
+	UCHAR											 uctRP;							// tRP
+	UCHAR											 uctRRD;						// tRRD	
+	UCHAR											 uctWR;							// tWR
+	UCHAR											 uctWTR;						// tWTR
+	UCHAR											 uctPDIX;						// tPDIX
+	UCHAR											 uctFAW;						// tFAW
+	UCHAR											 uctAOND;						// tAOND
+	UCHAR											 ucflag;						// flag to control memory timing calculation. bit0= control EMRS2 Infineon 
+////////////////////////////////////GDDR parameters///////////////////////////////////
+	UCHAR											 uctCCDL;						// 
+	UCHAR											 uctCRCRL;						// 
+	UCHAR											 uctCRCWL;						// 
+	UCHAR											 uctCKE;						// 
+	UCHAR											 uctCKRSE;						// 
+	UCHAR											 uctCKRSX;						// 
+	UCHAR											 uctFAW32;						// 
+	UCHAR											 ucMR5lo;					// 
+	UCHAR											 ucMR5hi;					// 
+	UCHAR											 ucTerminator;
+}ATOM_MEMORY_TIMING_FORMAT_V1;
+
+typedef	struct _ATOM_MEMORY_TIMING_FORMAT_V2
+{
+	ULONG											 ulClkRange;				// memory clock in 10kHz unit, when target memory clock is below this clock, use this memory timing 	
+	USHORT										 usMRS;							// mode register						
+	USHORT										 usEMRS;						// extended mode register
+	UCHAR											 ucCL;							// CAS latency
+	UCHAR											 ucWL;							// WRITE Latency				
+	UCHAR											 uctRAS;						// tRAS
+	UCHAR											 uctRC;							// tRC	
+	UCHAR											 uctRFC;						// tRFC
+	UCHAR											 uctRCDR;						// tRCDR	
+	UCHAR											 uctRCDW;						// tRCDW
+	UCHAR											 uctRP;							// tRP
+	UCHAR											 uctRRD;						// tRRD	
+	UCHAR											 uctWR;							// tWR
+	UCHAR											 uctWTR;						// tWTR
+	UCHAR											 uctPDIX;						// tPDIX
+	UCHAR											 uctFAW;						// tFAW
+	UCHAR											 uctAOND;						// tAOND
+	UCHAR											 ucflag;						// flag to control memory timing calculation. bit0= control EMRS2 Infineon 
+////////////////////////////////////GDDR parameters///////////////////////////////////
+	UCHAR											 uctCCDL;						// 
+	UCHAR											 uctCRCRL;						// 
+	UCHAR											 uctCRCWL;						// 
+	UCHAR											 uctCKE;						// 
+	UCHAR											 uctCKRSE;						// 
+	UCHAR											 uctCKRSX;						// 
+	UCHAR											 uctFAW32;						// 
+	UCHAR											 ucMR4lo;					// 
+	UCHAR											 ucMR4hi;					// 
+	UCHAR											 ucMR5lo;					// 
+	UCHAR											 ucMR5hi;					// 
+	UCHAR											 ucTerminator;
+	UCHAR											 ucReserved;	
+}ATOM_MEMORY_TIMING_FORMAT_V2;
+
+typedef	struct _ATOM_MEMORY_FORMAT
+{
+	ULONG											 ulDllDisClock;			// memory DLL will be disable when target memory clock is below this clock
+  union{
+    USHORT                     usEMRS2Value;      // EMRS2 Value is used for GDDR2 and GDDR4 memory type
+    USHORT                     usDDR3_Reserved;   // Not used for DDR3 memory
+  };
+  union{
+    USHORT                     usEMRS3Value;      // EMRS3 Value is used for GDDR2 and GDDR4 memory type
+    USHORT                     usDDR3_MR3;        // Used for DDR3 memory
+  };
+  UCHAR                      ucMemoryType;      // [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4;[3:0] - must not be used for now;
+  UCHAR                      ucMemoryVenderID;  // Predefined,never change across designs or memory type/vender. If not predefined, vendor detection table gets executed
+  UCHAR                      ucRow;             // Number of Row,in power of 2;
+  UCHAR                      ucColumn;          // Number of Column,in power of 2;
+  UCHAR                      ucBank;            // Nunber of Bank;
+  UCHAR                      ucRank;            // Number of Rank, in power of 2
+	UCHAR											 ucBurstSize;				// burst size, 0= burst size=4  1= burst size=8
+  UCHAR                      ucDllDisBit;				// position of DLL Enable/Disable bit in EMRS ( Extended Mode Register )
+  UCHAR                      ucRefreshRateFactor;	// memory refresh rate in unit of ms	
+	UCHAR											 ucDensity;					// _8Mx32, _16Mx32, _16Mx16, _32Mx16
+	UCHAR											 ucPreamble;				//[7:4] Write Preamble, [3:0] Read Preamble
+  UCHAR											 ucMemAttrib;				// Memory Device Addribute, like RDBI/WDBI etc
+	ATOM_MEMORY_TIMING_FORMAT	 asMemTiming[5];		//Memory Timing block sort from lower clock to higher clock
+}ATOM_MEMORY_FORMAT;
+
+
+typedef struct _ATOM_VRAM_MODULE_V3
+{
+	ULONG											 ulChannelMapCfg;		// board dependent paramenter:Channel combination
+	USHORT										 usSize;						// size of ATOM_VRAM_MODULE_V3
+  USHORT                     usDefaultMVDDQ;		// board dependent parameter:Default Memory Core Voltage
+  USHORT                     usDefaultMVDDC;		// board dependent parameter:Default Memory IO Voltage
+	UCHAR                      ucExtMemoryID;     // An external indicator (by hardcode, callback or pin) to tell what is the current memory module
+  UCHAR                      ucChannelNum;      // board dependent parameter:Number of channel;
+	UCHAR											 ucChannelSize;			// board dependent parameter:32bit or 64bit	
+	UCHAR											 ucVREFI;						// board dependnt parameter: EXT or INT +160mv to -140mv
+	UCHAR											 ucNPL_RT;					// board dependent parameter:NPL round trip delay, used for calculate memory timing parameters
+	UCHAR											 ucFlag;						// To enable/disable functionalities based on memory type
+	ATOM_MEMORY_FORMAT				 asMemory;					// describ all of video memory parameters from memory spec
+}ATOM_VRAM_MODULE_V3;
+
+
+//ATOM_VRAM_MODULE_V3.ucNPL_RT
 #define NPL_RT_MASK															0x0f
 #define BATTERY_ODT_MASK												0xc0
 
 #define ATOM_VRAM_MODULE		 ATOM_VRAM_MODULE_V3
 
-typedef struct _ATOM_VRAM_MODULE_V4 {
-	ULONG ulChannelMapCfg;	/*  board dependent parameter: Channel combination */
-	USHORT usModuleSize;	/*  size of ATOM_VRAM_MODULE_V4, make it easy for VBIOS to look for next entry of VRAM_MODULE */
-	USHORT usPrivateReserved;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
-	/*  MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS) */
-	USHORT usReserved;
-	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
-	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4; 0x5:DDR5 [3:0] - Must be 0x0 for now; */
-	UCHAR ucChannelNum;	/*  Number of channels present in this module config */
-	UCHAR ucChannelWidth;	/*  0 - 32 bits; 1 - 64 bits */
-	UCHAR ucDensity;	/*  _8Mx32, _16Mx32, _16Mx16, _32Mx16 */
-	UCHAR ucFlag;		/*  To enable/disable functionalities based on memory type */
-	UCHAR ucMisc;		/*  bit0: 0 - single rank; 1 - dual rank;   bit2: 0 - burstlength 4, 1 - burstlength 8 */
-	UCHAR ucVREFI;		/*  board dependent parameter */
-	UCHAR ucNPL_RT;		/*  board dependent parameter:NPL round trip delay, used for calculate memory timing parameters */
-	UCHAR ucPreamble;	/*  [7:4] Write Preamble, [3:0] Read Preamble */
-	UCHAR ucMemorySize;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
-	/*  Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros */
-	UCHAR ucReserved[3];
+typedef struct _ATOM_VRAM_MODULE_V4
+{
+  ULONG	  ulChannelMapCfg;	                // board dependent parameter: Channel combination
+  USHORT  usModuleSize;                     // size of ATOM_VRAM_MODULE_V4, make it easy for VBIOS to look for next entry of VRAM_MODULE
+  USHORT  usPrivateReserved;                // BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!!
+                                            // MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS)
+  USHORT  usReserved;
+  UCHAR   ucExtMemoryID;    		            // An external indicator (by hardcode, callback or pin) to tell what is the current memory module
+  UCHAR   ucMemoryType;                     // [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4; 0x5:DDR5 [3:0] - Must be 0x0 for now;
+  UCHAR   ucChannelNum;                     // Number of channels present in this module config
+  UCHAR   ucChannelWidth;                   // 0 - 32 bits; 1 - 64 bits
+	UCHAR   ucDensity;                        // _8Mx32, _16Mx32, _16Mx16, _32Mx16
+	UCHAR	  ucFlag;						                // To enable/disable functionalities based on memory type
+	UCHAR	  ucMisc;						                // bit0: 0 - single rank; 1 - dual rank;   bit2: 0 - burstlength 4, 1 - burstlength 8
+  UCHAR		ucVREFI;                          // board dependent parameter
+  UCHAR   ucNPL_RT;                         // board dependent parameter:NPL round trip delay, used for calculate memory timing parameters
+  UCHAR		ucPreamble;                       // [7:4] Write Preamble, [3:0] Read Preamble
+  UCHAR   ucMemorySize;                     // BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!!
+                                            // Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros
+  UCHAR   ucReserved[3];
 
-/* compare with V3, we flat the struct by merging ATOM_MEMORY_FORMAT (as is) into V4 as the same level */
-	union {
-		USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
-		USHORT usDDR3_Reserved;
-	};
-	union {
-		USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
-		USHORT usDDR3_MR3;	/*  Used for DDR3 memory */
-	};
-	UCHAR ucMemoryVenderID;	/*  Predefined, If not predefined, vendor detection table gets executed */
-	UCHAR ucRefreshRateFactor;	/*  [1:0]=RefreshFactor (00=8ms, 01=16ms, 10=32ms,11=64ms) */
-	UCHAR ucReserved2[2];
-	ATOM_MEMORY_TIMING_FORMAT asMemTiming[5];	/* Memory Timing block sort from lower clock to higher clock */
-} ATOM_VRAM_MODULE_V4;
+//compare with V3, we flat the struct by merging ATOM_MEMORY_FORMAT (as is) into V4 as the same level
+  union{
+    USHORT	usEMRS2Value;                   // EMRS2 Value is used for GDDR2 and GDDR4 memory type
+    USHORT  usDDR3_Reserved;
+  };
+  union{
+    USHORT	usEMRS3Value;                   // EMRS3 Value is used for GDDR2 and GDDR4 memory type
+    USHORT  usDDR3_MR3;                     // Used for DDR3 memory
+  };  
+  UCHAR   ucMemoryVenderID;  		            // Predefined, If not predefined, vendor detection table gets executed
+  UCHAR	  ucRefreshRateFactor;              // [1:0]=RefreshFactor (00=8ms, 01=16ms, 10=32ms,11=64ms)
+  UCHAR   ucReserved2[2];
+  ATOM_MEMORY_TIMING_FORMAT  asMemTiming[5];//Memory Timing block sort from lower clock to higher clock
+}ATOM_VRAM_MODULE_V4;
 
 #define VRAM_MODULE_V4_MISC_RANK_MASK       0x3
 #define VRAM_MODULE_V4_MISC_DUAL_RANK       0x1
@@ -3973,96 +4841,139 @@
 #define VRAM_MODULE_V4_MISC_BL8             0x4
 #define VRAM_MODULE_V4_MISC_DUAL_CS         0x10
 
-typedef struct _ATOM_VRAM_MODULE_V5 {
-	ULONG ulChannelMapCfg;	/*  board dependent parameter: Channel combination */
-	USHORT usModuleSize;	/*  size of ATOM_VRAM_MODULE_V4, make it easy for VBIOS to look for next entry of VRAM_MODULE */
-	USHORT usPrivateReserved;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
-	/*  MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS) */
-	USHORT usReserved;
-	UCHAR ucExtMemoryID;	/*  An external indicator (by hardcode, callback or pin) to tell what is the current memory module */
-	UCHAR ucMemoryType;	/*  [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4; 0x5:DDR5 [3:0] - Must be 0x0 for now; */
-	UCHAR ucChannelNum;	/*  Number of channels present in this module config */
-	UCHAR ucChannelWidth;	/*  0 - 32 bits; 1 - 64 bits */
-	UCHAR ucDensity;	/*  _8Mx32, _16Mx32, _16Mx16, _32Mx16 */
-	UCHAR ucFlag;		/*  To enable/disable functionalities based on memory type */
-	UCHAR ucMisc;		/*  bit0: 0 - single rank; 1 - dual rank;   bit2: 0 - burstlength 4, 1 - burstlength 8 */
-	UCHAR ucVREFI;		/*  board dependent parameter */
-	UCHAR ucNPL_RT;		/*  board dependent parameter:NPL round trip delay, used for calculate memory timing parameters */
-	UCHAR ucPreamble;	/*  [7:4] Write Preamble, [3:0] Read Preamble */
-	UCHAR ucMemorySize;	/*  BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!! */
-	/*  Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros */
-	UCHAR ucReserved[3];
+typedef struct _ATOM_VRAM_MODULE_V5
+{
+  ULONG	  ulChannelMapCfg;	                // board dependent parameter: Channel combination
+  USHORT  usModuleSize;                     // size of ATOM_VRAM_MODULE_V4, make it easy for VBIOS to look for next entry of VRAM_MODULE
+  USHORT  usPrivateReserved;                // BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!!
+                                            // MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS)
+  USHORT  usReserved;
+  UCHAR   ucExtMemoryID;    		            // An external indicator (by hardcode, callback or pin) to tell what is the current memory module
+  UCHAR   ucMemoryType;                     // [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4; 0x5:DDR5 [3:0] - Must be 0x0 for now;
+  UCHAR   ucChannelNum;                     // Number of channels present in this module config
+  UCHAR   ucChannelWidth;                   // 0 - 32 bits; 1 - 64 bits
+	UCHAR   ucDensity;                        // _8Mx32, _16Mx32, _16Mx16, _32Mx16
+	UCHAR	  ucFlag;						                // To enable/disable functionalities based on memory type
+	UCHAR	  ucMisc;						                // bit0: 0 - single rank; 1 - dual rank;   bit2: 0 - burstlength 4, 1 - burstlength 8
+  UCHAR		ucVREFI;                          // board dependent parameter
+  UCHAR   ucNPL_RT;                         // board dependent parameter:NPL round trip delay, used for calculate memory timing parameters
+  UCHAR		ucPreamble;                       // [7:4] Write Preamble, [3:0] Read Preamble
+  UCHAR   ucMemorySize;                     // BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!!
+                                            // Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros
+  UCHAR   ucReserved[3];
 
-/* compare with V3, we flat the struct by merging ATOM_MEMORY_FORMAT (as is) into V4 as the same level */
-	USHORT usEMRS2Value;	/*  EMRS2 Value is used for GDDR2 and GDDR4 memory type */
-	USHORT usEMRS3Value;	/*  EMRS3 Value is used for GDDR2 and GDDR4 memory type */
-	UCHAR ucMemoryVenderID;	/*  Predefined, If not predefined, vendor detection table gets executed */
-	UCHAR ucRefreshRateFactor;	/*  [1:0]=RefreshFactor (00=8ms, 01=16ms, 10=32ms,11=64ms) */
-	UCHAR ucFIFODepth;	/*  FIFO depth supposes to be detected during vendor detection, but if we dont do vendor detection we have to hardcode FIFO Depth */
-	UCHAR ucCDR_Bandwidth;	/*  [0:3]=Read CDR bandwidth, [4:7] - Write CDR Bandwidth */
-	ATOM_MEMORY_TIMING_FORMAT_V1 asMemTiming[5];	/* Memory Timing block sort from lower clock to higher clock */
-} ATOM_VRAM_MODULE_V5;
+//compare with V3, we flat the struct by merging ATOM_MEMORY_FORMAT (as is) into V4 as the same level
+  USHORT	usEMRS2Value;      		            // EMRS2 Value is used for GDDR2 and GDDR4 memory type
+  USHORT	usEMRS3Value;      		            // EMRS3 Value is used for GDDR2 and GDDR4 memory type
+  UCHAR   ucMemoryVenderID;  		            // Predefined, If not predefined, vendor detection table gets executed
+  UCHAR	  ucRefreshRateFactor;              // [1:0]=RefreshFactor (00=8ms, 01=16ms, 10=32ms,11=64ms)
+  UCHAR	  ucFIFODepth;			                // FIFO depth supposes to be detected during vendor detection, but if we dont do vendor detection we have to hardcode FIFO Depth
+  UCHAR   ucCDR_Bandwidth;		   // [0:3]=Read CDR bandwidth, [4:7] - Write CDR Bandwidth
+  ATOM_MEMORY_TIMING_FORMAT_V1  asMemTiming[5];//Memory Timing block sort from lower clock to higher clock
+}ATOM_VRAM_MODULE_V5;
 
-typedef struct _ATOM_VRAM_INFO_V2 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR ucNumOfVRAMModule;
-	ATOM_VRAM_MODULE aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];	/*  just for allocation, real number of blocks is in ucNumOfVRAMModule; */
-} ATOM_VRAM_INFO_V2;
+typedef struct _ATOM_VRAM_MODULE_V6
+{
+  ULONG	  ulChannelMapCfg;	                // board dependent parameter: Channel combination
+  USHORT  usModuleSize;                     // size of ATOM_VRAM_MODULE_V4, make it easy for VBIOS to look for next entry of VRAM_MODULE
+  USHORT  usPrivateReserved;                // BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!!
+                                            // MC_ARB_RAMCFG (includes NOOFBANK,NOOFRANKS,NOOFROWS,NOOFCOLS)
+  USHORT  usReserved;
+  UCHAR   ucExtMemoryID;    		            // An external indicator (by hardcode, callback or pin) to tell what is the current memory module
+  UCHAR   ucMemoryType;                     // [7:4]=0x1:DDR1;=0x2:DDR2;=0x3:DDR3;=0x4:DDR4; 0x5:DDR5 [3:0] - Must be 0x0 for now;
+  UCHAR   ucChannelNum;                     // Number of channels present in this module config
+  UCHAR   ucChannelWidth;                   // 0 - 32 bits; 1 - 64 bits
+	UCHAR   ucDensity;                        // _8Mx32, _16Mx32, _16Mx16, _32Mx16
+	UCHAR	  ucFlag;						                // To enable/disable functionalities based on memory type
+	UCHAR	  ucMisc;						                // bit0: 0 - single rank; 1 - dual rank;   bit2: 0 - burstlength 4, 1 - burstlength 8
+  UCHAR		ucVREFI;                          // board dependent parameter
+  UCHAR   ucNPL_RT;                         // board dependent parameter:NPL round trip delay, used for calculate memory timing parameters
+  UCHAR		ucPreamble;                       // [7:4] Write Preamble, [3:0] Read Preamble
+  UCHAR   ucMemorySize;                     // BIOS internal reserved space to optimize code size, updated by the compiler, shouldn't be modified manually!!
+                                            // Total memory size in unit of 16MB for CONFIG_MEMSIZE - bit[23:0] zeros
+  UCHAR   ucReserved[3];
 
-typedef struct _ATOM_VRAM_INFO_V3 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usMemAdjustTblOffset;	/*  offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting */
-	USHORT usMemClkPatchTblOffset;	/*      offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting */
-	USHORT usRerseved;
-	UCHAR aVID_PinsShift[9];	/*  8 bit strap maximum+terminator */
-	UCHAR ucNumOfVRAMModule;
-	ATOM_VRAM_MODULE aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];	/*  just for allocation, real number of blocks is in ucNumOfVRAMModule; */
-	ATOM_INIT_REG_BLOCK asMemPatch;	/*  for allocation */
-	/*      ATOM_INIT_REG_BLOCK                              aMemAdjust; */
-} ATOM_VRAM_INFO_V3;
+//compare with V3, we flat the struct by merging ATOM_MEMORY_FORMAT (as is) into V4 as the same level
+  USHORT	usEMRS2Value;      		            // EMRS2 Value is used for GDDR2 and GDDR4 memory type
+  USHORT	usEMRS3Value;      		            // EMRS3 Value is used for GDDR2 and GDDR4 memory type
+  UCHAR   ucMemoryVenderID;  		            // Predefined, If not predefined, vendor detection table gets executed
+  UCHAR	  ucRefreshRateFactor;              // [1:0]=RefreshFactor (00=8ms, 01=16ms, 10=32ms,11=64ms)
+  UCHAR	  ucFIFODepth;			                // FIFO depth supposes to be detected during vendor detection, but if we dont do vendor detection we have to hardcode FIFO Depth
+  UCHAR   ucCDR_Bandwidth;		   // [0:3]=Read CDR bandwidth, [4:7] - Write CDR Bandwidth
+  ATOM_MEMORY_TIMING_FORMAT_V2  asMemTiming[5];//Memory Timing block sort from lower clock to higher clock
+}ATOM_VRAM_MODULE_V6;
+
+
+
+typedef struct _ATOM_VRAM_INFO_V2
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+  UCHAR                      ucNumOfVRAMModule;
+  ATOM_VRAM_MODULE           aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];      // just for allocation, real number of blocks is in ucNumOfVRAMModule;
+}ATOM_VRAM_INFO_V2;
+
+typedef struct _ATOM_VRAM_INFO_V3
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+	USHORT										 usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
+	USHORT										 usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
+	USHORT										 usRerseved;
+	UCHAR           	         aVID_PinsShift[9];															 // 8 bit strap maximum+terminator
+  UCHAR                      ucNumOfVRAMModule;
+  ATOM_VRAM_MODULE		       aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];      // just for allocation, real number of blocks is in ucNumOfVRAMModule;
+	ATOM_INIT_REG_BLOCK				 asMemPatch;																		 // for allocation
+																																						 //	ATOM_INIT_REG_BLOCK				 aMemAdjust;
+}ATOM_VRAM_INFO_V3;
 
 #define	ATOM_VRAM_INFO_LAST	     ATOM_VRAM_INFO_V3
 
-typedef struct _ATOM_VRAM_INFO_V4 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usMemAdjustTblOffset;	/*  offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting */
-	USHORT usMemClkPatchTblOffset;	/*      offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting */
-	USHORT usRerseved;
-	UCHAR ucMemDQ7_0ByteRemap;	/*  DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3 */
-	ULONG ulMemDQ7_0BitRemap;	/*  each DQ line ( 7~0) use 3bits, like: DQ0=Bit[2:0], DQ1:[5:3], ... DQ7:[23:21] */
-	UCHAR ucReservde[4];
-	UCHAR ucNumOfVRAMModule;
-	ATOM_VRAM_MODULE_V4 aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];	/*  just for allocation, real number of blocks is in ucNumOfVRAMModule; */
-	ATOM_INIT_REG_BLOCK asMemPatch;	/*  for allocation */
-	/*      ATOM_INIT_REG_BLOCK                              aMemAdjust; */
-} ATOM_VRAM_INFO_V4;
+typedef struct _ATOM_VRAM_INFO_V4
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+	USHORT										 usMemAdjustTblOffset;													 // offset of ATOM_INIT_REG_BLOCK structure for memory vendor specific MC adjust setting
+	USHORT										 usMemClkPatchTblOffset;												 //	offset of ATOM_INIT_REG_BLOCK structure for memory clock specific MC setting
+	USHORT										 usRerseved;
+	UCHAR           	         ucMemDQ7_0ByteRemap;													   // DQ line byte remap, =0: Memory Data line BYTE0, =1: BYTE1, =2: BYTE2, =3: BYTE3
+  ULONG                      ulMemDQ7_0BitRemap;                             // each DQ line ( 7~0) use 3bits, like: DQ0=Bit[2:0], DQ1:[5:3], ... DQ7:[23:21]
+  UCHAR                      ucReservde[4]; 
+  UCHAR                      ucNumOfVRAMModule;
+  ATOM_VRAM_MODULE_V4		     aVramInfo[ATOM_MAX_NUMBER_OF_VRAM_MODULE];      // just for allocation, real number of blocks is in ucNumOfVRAMModule;
+	ATOM_INIT_REG_BLOCK				 asMemPatch;																		 // for allocation
+																																						 //	ATOM_INIT_REG_BLOCK				 aMemAdjust;
+}ATOM_VRAM_INFO_V4;
 
-typedef struct _ATOM_VRAM_GPIO_DETECTION_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR aVID_PinsShift[9];	/* 8 bit strap maximum+terminator */
-} ATOM_VRAM_GPIO_DETECTION_INFO;
+typedef struct _ATOM_VRAM_GPIO_DETECTION_INFO
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;
+  UCHAR           	         aVID_PinsShift[9];   //8 bit strap maximum+terminator
+}ATOM_VRAM_GPIO_DETECTION_INFO;
 
-typedef struct _ATOM_MEMORY_TRAINING_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR ucTrainingLoop;
-	UCHAR ucReserved[3];
-	ATOM_INIT_REG_BLOCK asMemTrainingSetting;
-} ATOM_MEMORY_TRAINING_INFO;
 
-typedef struct SW_I2C_CNTL_DATA_PARAMETERS {
-	UCHAR ucControl;
-	UCHAR ucData;
-	UCHAR ucSatus;
-	UCHAR ucTemp;
+typedef struct _ATOM_MEMORY_TRAINING_INFO
+{
+	ATOM_COMMON_TABLE_HEADER   sHeader;
+	UCHAR											 ucTrainingLoop;
+	UCHAR											 ucReserved[3];
+	ATOM_INIT_REG_BLOCK				 asMemTrainingSetting;
+}ATOM_MEMORY_TRAINING_INFO;
+
+
+typedef struct SW_I2C_CNTL_DATA_PARAMETERS
+{
+  UCHAR    ucControl;
+  UCHAR    ucData; 
+  UCHAR    ucSatus; 
+  UCHAR    ucTemp; 
 } SW_I2C_CNTL_DATA_PARAMETERS;
 
 #define SW_I2C_CNTL_DATA_PS_ALLOCATION  SW_I2C_CNTL_DATA_PARAMETERS
 
-typedef struct _SW_I2C_IO_DATA_PARAMETERS {
-	USHORT GPIO_Info;
-	UCHAR ucAct;
-	UCHAR ucData;
-} SW_I2C_IO_DATA_PARAMETERS;
+typedef struct _SW_I2C_IO_DATA_PARAMETERS
+{                               
+  USHORT   GPIO_Info;
+  UCHAR    ucAct; 
+  UCHAR    ucData; 
+ } SW_I2C_IO_DATA_PARAMETERS;
 
 #define SW_I2C_IO_DATA_PS_ALLOCATION  SW_I2C_IO_DATA_PARAMETERS
 
@@ -4087,127 +4998,136 @@
 #define SW_I2C_CNTL_CLOSE     5
 #define SW_I2C_CNTL_WRITE1BIT 6
 
-/* ==============================VESA definition Portion=============================== */
+//==============================VESA definition Portion===============================
 #define VESA_OEM_PRODUCT_REV			            '01.00'
-#define VESA_MODE_ATTRIBUTE_MODE_SUPPORT	     0xBB	/* refer to VBE spec p.32, no TTY support */
+#define VESA_MODE_ATTRIBUTE_MODE_SUPPORT	     0xBB	//refer to VBE spec p.32, no TTY support
 #define VESA_MODE_WIN_ATTRIBUTE						     7
 #define VESA_WIN_SIZE											     64
 
-typedef struct _PTR_32_BIT_STRUCTURE {
-	USHORT Offset16;
-	USHORT Segment16;
+typedef struct _PTR_32_BIT_STRUCTURE
+{
+	USHORT	Offset16;			
+	USHORT	Segment16;				
 } PTR_32_BIT_STRUCTURE;
 
-typedef union _PTR_32_BIT_UNION {
-	PTR_32_BIT_STRUCTURE SegmentOffset;
-	ULONG Ptr32_Bit;
+typedef union _PTR_32_BIT_UNION
+{
+	PTR_32_BIT_STRUCTURE	SegmentOffset;
+	ULONG					        Ptr32_Bit;
 } PTR_32_BIT_UNION;
 
-typedef struct _VBE_1_2_INFO_BLOCK_UPDATABLE {
-	UCHAR VbeSignature[4];
-	USHORT VbeVersion;
-	PTR_32_BIT_UNION OemStringPtr;
-	UCHAR Capabilities[4];
-	PTR_32_BIT_UNION VideoModePtr;
-	USHORT TotalMemory;
+typedef struct _VBE_1_2_INFO_BLOCK_UPDATABLE
+{
+	UCHAR				      VbeSignature[4];
+	USHORT				    VbeVersion;
+	PTR_32_BIT_UNION	OemStringPtr;
+	UCHAR				      Capabilities[4];
+	PTR_32_BIT_UNION	VideoModePtr;
+	USHORT				    TotalMemory;
 } VBE_1_2_INFO_BLOCK_UPDATABLE;
 
-typedef struct _VBE_2_0_INFO_BLOCK_UPDATABLE {
-	VBE_1_2_INFO_BLOCK_UPDATABLE CommonBlock;
-	USHORT OemSoftRev;
-	PTR_32_BIT_UNION OemVendorNamePtr;
-	PTR_32_BIT_UNION OemProductNamePtr;
-	PTR_32_BIT_UNION OemProductRevPtr;
+
+typedef struct _VBE_2_0_INFO_BLOCK_UPDATABLE
+{
+	VBE_1_2_INFO_BLOCK_UPDATABLE	CommonBlock;
+	USHORT							    OemSoftRev;
+	PTR_32_BIT_UNION				OemVendorNamePtr;
+	PTR_32_BIT_UNION				OemProductNamePtr;
+	PTR_32_BIT_UNION				OemProductRevPtr;
 } VBE_2_0_INFO_BLOCK_UPDATABLE;
 
-typedef union _VBE_VERSION_UNION {
-	VBE_2_0_INFO_BLOCK_UPDATABLE VBE_2_0_InfoBlock;
-	VBE_1_2_INFO_BLOCK_UPDATABLE VBE_1_2_InfoBlock;
+typedef union _VBE_VERSION_UNION
+{
+	VBE_2_0_INFO_BLOCK_UPDATABLE	VBE_2_0_InfoBlock;
+	VBE_1_2_INFO_BLOCK_UPDATABLE	VBE_1_2_InfoBlock;
 } VBE_VERSION_UNION;
 
-typedef struct _VBE_INFO_BLOCK {
-	VBE_VERSION_UNION UpdatableVBE_Info;
-	UCHAR Reserved[222];
-	UCHAR OemData[256];
+typedef struct _VBE_INFO_BLOCK
+{
+	VBE_VERSION_UNION			UpdatableVBE_Info;
+	UCHAR						      Reserved[222];
+	UCHAR						      OemData[256];
 } VBE_INFO_BLOCK;
 
-typedef struct _VBE_FP_INFO {
-	USHORT HSize;
-	USHORT VSize;
-	USHORT FPType;
-	UCHAR RedBPP;
-	UCHAR GreenBPP;
-	UCHAR BlueBPP;
-	UCHAR ReservedBPP;
-	ULONG RsvdOffScrnMemSize;
-	ULONG RsvdOffScrnMEmPtr;
-	UCHAR Reserved[14];
+typedef struct _VBE_FP_INFO
+{
+  USHORT	HSize;
+	USHORT	VSize;
+	USHORT	FPType;
+	UCHAR		RedBPP;
+	UCHAR		GreenBPP;
+	UCHAR		BlueBPP;
+	UCHAR		ReservedBPP;
+	ULONG		RsvdOffScrnMemSize;
+	ULONG		RsvdOffScrnMEmPtr;
+	UCHAR		Reserved[14];
 } VBE_FP_INFO;
 
-typedef struct _VESA_MODE_INFO_BLOCK {
-/*  Mandatory information for all VBE revisions */
-	USHORT ModeAttributes;	/*                  dw      ?       ; mode attributes */
-	UCHAR WinAAttributes;	/*                    db      ?       ; window A attributes */
-	UCHAR WinBAttributes;	/*                    db      ?       ; window B attributes */
-	USHORT WinGranularity;	/*                    dw      ?       ; window granularity */
-	USHORT WinSize;		/*                    dw      ?       ; window size */
-	USHORT WinASegment;	/*                    dw      ?       ; window A start segment */
-	USHORT WinBSegment;	/*                    dw      ?       ; window B start segment */
-	ULONG WinFuncPtr;	/*                    dd      ?       ; real mode pointer to window function */
-	USHORT BytesPerScanLine;	/*                    dw      ?       ; bytes per scan line */
+typedef struct _VESA_MODE_INFO_BLOCK
+{
+// Mandatory information for all VBE revisions
+  USHORT    ModeAttributes;  //			dw	?	; mode attributes
+	UCHAR     WinAAttributes;  //			db	?	; window A attributes
+	UCHAR     WinBAttributes;  //			db	?	; window B attributes
+	USHORT    WinGranularity;  //			dw	?	; window granularity
+	USHORT    WinSize;         //			dw	?	; window size
+	USHORT    WinASegment;     //			dw	?	; window A start segment
+	USHORT    WinBSegment;     //			dw	?	; window B start segment
+	ULONG     WinFuncPtr;      //			dd	?	; real mode pointer to window function
+	USHORT    BytesPerScanLine;//			dw	?	; bytes per scan line
 
-/* ; Mandatory information for VBE 1.2 and above */
-	USHORT XResolution;	/*                         dw      ?       ; horizontal resolution in pixels or characters */
-	USHORT YResolution;	/*                   dw      ?       ; vertical resolution in pixels or characters */
-	UCHAR XCharSize;	/*                   db      ?       ; character cell width in pixels */
-	UCHAR YCharSize;	/*                   db      ?       ; character cell height in pixels */
-	UCHAR NumberOfPlanes;	/*                   db      ?       ; number of memory planes */
-	UCHAR BitsPerPixel;	/*                   db      ?       ; bits per pixel */
-	UCHAR NumberOfBanks;	/*                   db      ?       ; number of banks */
-	UCHAR MemoryModel;	/*                   db      ?       ; memory model type */
-	UCHAR BankSize;		/*                   db      ?       ; bank size in KB */
-	UCHAR NumberOfImagePages;	/*            db    ?       ; number of images */
-	UCHAR ReservedForPageFunction;	/* db  1       ; reserved for page function */
+//; Mandatory information for VBE 1.2 and above
+  USHORT    XResolution;      //			dw	?	; horizontal resolution in pixels or characters
+	USHORT    YResolution;      //			dw	?	; vertical resolution in pixels or characters
+	UCHAR     XCharSize;        //			db	?	; character cell width in pixels
+	UCHAR     YCharSize;        //			db	?	; character cell height in pixels
+	UCHAR     NumberOfPlanes;   //			db	?	; number of memory planes
+	UCHAR     BitsPerPixel;     //			db	?	; bits per pixel
+	UCHAR     NumberOfBanks;    //			db	?	; number of banks
+	UCHAR     MemoryModel;      //			db	?	; memory model type
+	UCHAR     BankSize;         //			db	?	; bank size in KB
+	UCHAR     NumberOfImagePages;//		  db	?	; number of images
+	UCHAR     ReservedForPageFunction;//db	1	; reserved for page function
 
-/* ; Direct Color fields(required for direct/6 and YUV/7 memory models) */
-	UCHAR RedMaskSize;	/*           db      ?       ; size of direct color red mask in bits */
-	UCHAR RedFieldPosition;	/*           db      ?       ; bit position of lsb of red mask */
-	UCHAR GreenMaskSize;	/*           db      ?       ; size of direct color green mask in bits */
-	UCHAR GreenFieldPosition;	/*           db      ?       ; bit position of lsb of green mask */
-	UCHAR BlueMaskSize;	/*           db      ?       ; size of direct color blue mask in bits */
-	UCHAR BlueFieldPosition;	/*           db      ?       ; bit position of lsb of blue mask */
-	UCHAR RsvdMaskSize;	/*           db      ?       ; size of direct color reserved mask in bits */
-	UCHAR RsvdFieldPosition;	/*           db      ?       ; bit position of lsb of reserved mask */
-	UCHAR DirectColorModeInfo;	/*           db      ?       ; direct color mode attributes */
+//; Direct Color fields(required for direct/6 and YUV/7 memory models)
+	UCHAR			RedMaskSize;        //		db	?	; size of direct color red mask in bits
+	UCHAR			RedFieldPosition;   //		db	?	; bit position of lsb of red mask
+	UCHAR			GreenMaskSize;      //		db	?	; size of direct color green mask in bits
+	UCHAR			GreenFieldPosition; //		db	?	; bit position of lsb of green mask
+	UCHAR			BlueMaskSize;       //		db	?	; size of direct color blue mask in bits
+	UCHAR			BlueFieldPosition;  //		db	?	; bit position of lsb of blue mask
+	UCHAR			RsvdMaskSize;       //		db	?	; size of direct color reserved mask in bits
+	UCHAR			RsvdFieldPosition;  //		db	?	; bit position of lsb of reserved mask
+	UCHAR			DirectColorModeInfo;//		db	?	; direct color mode attributes
 
-/* ; Mandatory information for VBE 2.0 and above */
-	ULONG PhysBasePtr;	/*           dd      ?       ; physical address for flat memory frame buffer */
-	ULONG Reserved_1;	/*           dd      0       ; reserved - always set to 0 */
-	USHORT Reserved_2;	/*     dw    0       ; reserved - always set to 0 */
+//; Mandatory information for VBE 2.0 and above
+	ULONG			PhysBasePtr;        //		dd	?	; physical address for flat memory frame buffer
+	ULONG			Reserved_1;         //		dd	0	; reserved - always set to 0
+	USHORT		Reserved_2;         //	  dw	0	; reserved - always set to 0
 
-/* ; Mandatory information for VBE 3.0 and above */
-	USHORT LinBytesPerScanLine;	/*         dw      ?       ; bytes per scan line for linear modes */
-	UCHAR BnkNumberOfImagePages;	/*         db      ?       ; number of images for banked modes */
-	UCHAR LinNumberOfImagPages;	/*         db      ?       ; number of images for linear modes */
-	UCHAR LinRedMaskSize;	/*         db      ?       ; size of direct color red mask(linear modes) */
-	UCHAR LinRedFieldPosition;	/*         db      ?       ; bit position of lsb of red mask(linear modes) */
-	UCHAR LinGreenMaskSize;	/*         db      ?       ; size of direct color green mask(linear modes) */
-	UCHAR LinGreenFieldPosition;	/*         db      ?       ; bit position of lsb of green mask(linear modes) */
-	UCHAR LinBlueMaskSize;	/*         db      ?       ; size of direct color blue mask(linear modes) */
-	UCHAR LinBlueFieldPosition;	/*         db      ?       ; bit position of lsb of blue mask(linear modes) */
-	UCHAR LinRsvdMaskSize;	/*         db      ?       ; size of direct color reserved mask(linear modes) */
-	UCHAR LinRsvdFieldPosition;	/*         db      ?       ; bit position of lsb of reserved mask(linear modes) */
-	ULONG MaxPixelClock;	/*         dd      ?       ; maximum pixel clock(in Hz) for graphics mode */
-	UCHAR Reserved;		/*         db      190 dup (0) */
+//; Mandatory information for VBE 3.0 and above
+	USHORT		LinBytesPerScanLine;  //	dw	?	; bytes per scan line for linear modes
+	UCHAR			BnkNumberOfImagePages;//	db	?	; number of images for banked modes
+	UCHAR			LinNumberOfImagPages; //	db	?	; number of images for linear modes
+	UCHAR			LinRedMaskSize;       //	db	?	; size of direct color red mask(linear modes)
+	UCHAR			LinRedFieldPosition;  //	db	?	; bit position of lsb of red mask(linear modes)
+	UCHAR			LinGreenMaskSize;     //	db	?	; size of direct color green mask(linear modes)
+	UCHAR			LinGreenFieldPosition;//	db	?	; bit position of lsb of green mask(linear modes)
+	UCHAR			LinBlueMaskSize;      //	db	?	; size of direct color blue mask(linear modes)
+	UCHAR			LinBlueFieldPosition; //	db	?	; bit position of lsb of blue mask(linear modes)
+	UCHAR			LinRsvdMaskSize;      //	db	?	; size of direct color reserved mask(linear modes)
+	UCHAR			LinRsvdFieldPosition; //	db	?	; bit position of lsb of reserved mask(linear modes)
+	ULONG			MaxPixelClock;        //	dd	?	; maximum pixel clock(in Hz) for graphics mode
+	UCHAR			Reserved;             //	db	190 dup (0)
 } VESA_MODE_INFO_BLOCK;
 
-/*  BIOS function CALLS */
-#define ATOM_BIOS_EXTENDED_FUNCTION_CODE        0xA0	/*  ATI Extended Function code */
+// BIOS function CALLS
+#define ATOM_BIOS_EXTENDED_FUNCTION_CODE        0xA0	        // ATI Extended Function code
 #define ATOM_BIOS_FUNCTION_COP_MODE             0x00
 #define ATOM_BIOS_FUNCTION_SHORT_QUERY1         0x04
 #define ATOM_BIOS_FUNCTION_SHORT_QUERY2         0x05
 #define ATOM_BIOS_FUNCTION_SHORT_QUERY3         0x06
-#define ATOM_BIOS_FUNCTION_GET_DDC              0x0B
+#define ATOM_BIOS_FUNCTION_GET_DDC              0x0B   
 #define ATOM_BIOS_FUNCTION_ASIC_DSTATE          0x0E
 #define ATOM_BIOS_FUNCTION_DEBUG_PLAY           0x0F
 #define ATOM_BIOS_FUNCTION_STV_STD              0x16
@@ -4217,100 +5137,135 @@
 #define ATOM_BIOS_FUNCTION_PANEL_CONTROL        0x82
 #define ATOM_BIOS_FUNCTION_OLD_DEVICE_DET       0x83
 #define ATOM_BIOS_FUNCTION_OLD_DEVICE_SWITCH    0x84
-#define ATOM_BIOS_FUNCTION_HW_ICON              0x8A
+#define ATOM_BIOS_FUNCTION_HW_ICON              0x8A 
 #define ATOM_BIOS_FUNCTION_SET_CMOS             0x8B
-#define SUB_FUNCTION_UPDATE_DISPLAY_INFO        0x8000	/*  Sub function 80 */
-#define SUB_FUNCTION_UPDATE_EXPANSION_INFO      0x8100	/*  Sub function 80 */
+#define SUB_FUNCTION_UPDATE_DISPLAY_INFO        0x8000          // Sub function 80
+#define SUB_FUNCTION_UPDATE_EXPANSION_INFO      0x8100          // Sub function 80
 
 #define ATOM_BIOS_FUNCTION_DISPLAY_INFO         0x8D
 #define ATOM_BIOS_FUNCTION_DEVICE_ON_OFF        0x8E
-#define ATOM_BIOS_FUNCTION_VIDEO_STATE          0x8F
-#define ATOM_SUB_FUNCTION_GET_CRITICAL_STATE    0x0300	/*  Sub function 03 */
-#define ATOM_SUB_FUNCTION_GET_LIDSTATE          0x0700	/*  Sub function 7 */
-#define ATOM_SUB_FUNCTION_THERMAL_STATE_NOTICE  0x1400	/*  Notify caller the current thermal state */
-#define ATOM_SUB_FUNCTION_CRITICAL_STATE_NOTICE 0x8300	/*  Notify caller the current critical state */
-#define ATOM_SUB_FUNCTION_SET_LIDSTATE          0x8500	/*  Sub function 85 */
-#define ATOM_SUB_FUNCTION_GET_REQ_DISPLAY_FROM_SBIOS_MODE 0x8900	/*  Sub function 89 */
-#define ATOM_SUB_FUNCTION_INFORM_ADC_SUPPORT    0x9400	/*  Notify caller that ADC is supported */
+#define ATOM_BIOS_FUNCTION_VIDEO_STATE          0x8F 
+#define ATOM_SUB_FUNCTION_GET_CRITICAL_STATE    0x0300          // Sub function 03  
+#define ATOM_SUB_FUNCTION_GET_LIDSTATE          0x0700          // Sub function 7
+#define ATOM_SUB_FUNCTION_THERMAL_STATE_NOTICE  0x1400          // Notify caller the current thermal state
+#define ATOM_SUB_FUNCTION_CRITICAL_STATE_NOTICE 0x8300          // Notify caller the current critical state
+#define ATOM_SUB_FUNCTION_SET_LIDSTATE          0x8500          // Sub function 85
+#define ATOM_SUB_FUNCTION_GET_REQ_DISPLAY_FROM_SBIOS_MODE 0x8900// Sub function 89
+#define ATOM_SUB_FUNCTION_INFORM_ADC_SUPPORT    0x9400          // Notify caller that ADC is supported
+     
 
-#define ATOM_BIOS_FUNCTION_VESA_DPMS            0x4F10	/*  Set DPMS */
-#define ATOM_SUB_FUNCTION_SET_DPMS              0x0001	/*  BL: Sub function 01 */
-#define ATOM_SUB_FUNCTION_GET_DPMS              0x0002	/*  BL: Sub function 02 */
-#define ATOM_PARAMETER_VESA_DPMS_ON             0x0000	/*  BH Parameter for DPMS ON. */
-#define ATOM_PARAMETER_VESA_DPMS_STANDBY        0x0100	/*  BH Parameter for DPMS STANDBY */
-#define ATOM_PARAMETER_VESA_DPMS_SUSPEND        0x0200	/*  BH Parameter for DPMS SUSPEND */
-#define ATOM_PARAMETER_VESA_DPMS_OFF            0x0400	/*  BH Parameter for DPMS OFF */
-#define ATOM_PARAMETER_VESA_DPMS_REDUCE_ON      0x0800	/*  BH Parameter for DPMS REDUCE ON (NOT SUPPORTED) */
+#define ATOM_BIOS_FUNCTION_VESA_DPMS            0x4F10          // Set DPMS 
+#define ATOM_SUB_FUNCTION_SET_DPMS              0x0001          // BL: Sub function 01 
+#define ATOM_SUB_FUNCTION_GET_DPMS              0x0002          // BL: Sub function 02 
+#define ATOM_PARAMETER_VESA_DPMS_ON             0x0000          // BH Parameter for DPMS ON.  
+#define ATOM_PARAMETER_VESA_DPMS_STANDBY        0x0100          // BH Parameter for DPMS STANDBY  
+#define ATOM_PARAMETER_VESA_DPMS_SUSPEND        0x0200          // BH Parameter for DPMS SUSPEND
+#define ATOM_PARAMETER_VESA_DPMS_OFF            0x0400          // BH Parameter for DPMS OFF
+#define ATOM_PARAMETER_VESA_DPMS_REDUCE_ON      0x0800          // BH Parameter for DPMS REDUCE ON (NOT SUPPORTED)
 
 #define ATOM_BIOS_RETURN_CODE_MASK              0x0000FF00L
 #define ATOM_BIOS_REG_HIGH_MASK                 0x0000FF00L
 #define ATOM_BIOS_REG_LOW_MASK                  0x000000FFL
 
-/*  structure used for VBIOS only */
+// structure used for VBIOS only
 
-/* DispOutInfoTable */
-typedef struct _ASIC_TRANSMITTER_INFO {
+//DispOutInfoTable
+typedef struct _ASIC_TRANSMITTER_INFO
+{
 	USHORT usTransmitterObjId;
 	USHORT usSupportDevice;
-	UCHAR ucTransmitterCmdTblId;
-	UCHAR ucConfig;
-	UCHAR ucEncoderID;	/* available 1st encoder ( default ) */
-	UCHAR ucOptionEncoderID;	/* available 2nd encoder ( optional ) */
-	UCHAR uc2ndEncoderID;
-	UCHAR ucReserved;
-} ASIC_TRANSMITTER_INFO;
+  UCHAR  ucTransmitterCmdTblId;
+	UCHAR  ucConfig;
+	UCHAR  ucEncoderID;					 //available 1st encoder ( default )
+	UCHAR  ucOptionEncoderID;    //available 2nd encoder ( optional )
+	UCHAR  uc2ndEncoderID;
+	UCHAR  ucReserved;
+}ASIC_TRANSMITTER_INFO;
 
-typedef struct _ASIC_ENCODER_INFO {
+typedef struct _ASIC_ENCODER_INFO
+{
 	UCHAR ucEncoderID;
 	UCHAR ucEncoderConfig;
-	USHORT usEncoderCmdTblId;
-} ASIC_ENCODER_INFO;
+  USHORT usEncoderCmdTblId;
+}ASIC_ENCODER_INFO;
 
-typedef struct _ATOM_DISP_OUT_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
+typedef struct _ATOM_DISP_OUT_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
 	USHORT ptrTransmitterInfo;
 	USHORT ptrEncoderInfo;
-	ASIC_TRANSMITTER_INFO asTransmitterInfo[1];
-	ASIC_ENCODER_INFO asEncoderInfo[1];
-} ATOM_DISP_OUT_INFO;
+	ASIC_TRANSMITTER_INFO  asTransmitterInfo[1];
+	ASIC_ENCODER_INFO      asEncoderInfo[1];
+}ATOM_DISP_OUT_INFO;
 
-/*  DispDevicePriorityInfo */
-typedef struct _ATOM_DISPLAY_DEVICE_PRIORITY_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
+typedef struct _ATOM_DISP_OUT_INFO_V2
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+	USHORT ptrTransmitterInfo;
+	USHORT ptrEncoderInfo;
+  USHORT ptrMainCallParserFar;                  // direct address of main parser call in VBIOS binary. 
+	ASIC_TRANSMITTER_INFO  asTransmitterInfo[1];
+	ASIC_ENCODER_INFO      asEncoderInfo[1];
+}ATOM_DISP_OUT_INFO_V2;
+
+// DispDevicePriorityInfo
+typedef struct _ATOM_DISPLAY_DEVICE_PRIORITY_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
 	USHORT asDevicePriority[16];
-} ATOM_DISPLAY_DEVICE_PRIORITY_INFO;
+}ATOM_DISPLAY_DEVICE_PRIORITY_INFO;
 
-/* ProcessAuxChannelTransactionTable */
-typedef struct _PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS {
-	USHORT lpAuxRequest;
-	USHORT lpDataOut;
-	UCHAR ucChannelID;
-	union {
-		UCHAR ucReplyStatus;
-		UCHAR ucDelay;
+//ProcessAuxChannelTransactionTable
+typedef struct _PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS
+{
+	USHORT	lpAuxRequest;
+	USHORT  lpDataOut;
+	UCHAR		ucChannelID;
+	union
+	{
+  UCHAR   ucReplyStatus;
+	UCHAR   ucDelay;
 	};
-	UCHAR ucDataOutLen;
-	UCHAR ucReserved;
-} PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS;
+  UCHAR   ucDataOutLen;
+	UCHAR   ucReserved;
+}PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS;
+
+//ProcessAuxChannelTransactionTable
+typedef struct _PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2
+{
+	USHORT	lpAuxRequest;
+	USHORT  lpDataOut;
+	UCHAR		ucChannelID;
+	union
+	{
+  UCHAR   ucReplyStatus;
+	UCHAR   ucDelay;
+	};
+  UCHAR   ucDataOutLen;
+	UCHAR   ucHPD_ID;                                       //=0: HPD1, =1: HPD2, =2: HPD3, =3: HPD4, =4: HPD5, =5: HPD6
+}PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2;
 
 #define PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION			PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS
 
-/* GetSinkType */
+//GetSinkType
 
-typedef struct _DP_ENCODER_SERVICE_PARAMETERS {
+typedef struct _DP_ENCODER_SERVICE_PARAMETERS
+{
 	USHORT ucLinkClock;
-	union {
-		UCHAR ucConfig;	/*  for DP training command */
-		UCHAR ucI2cId;	/*  use for GET_SINK_TYPE command */
+	union 
+	{
+	UCHAR ucConfig;				// for DP training command
+	UCHAR ucI2cId;				// use for GET_SINK_TYPE command
 	};
 	UCHAR ucAction;
 	UCHAR ucStatus;
 	UCHAR ucLaneNum;
 	UCHAR ucReserved[2];
-} DP_ENCODER_SERVICE_PARAMETERS;
+}DP_ENCODER_SERVICE_PARAMETERS;
 
-/*  ucAction */
+// ucAction
 #define ATOM_DP_ACTION_GET_SINK_TYPE							0x01
+/* obselete */
 #define ATOM_DP_ACTION_TRAINING_START							0x02
 #define ATOM_DP_ACTION_TRAINING_COMPLETE					0x03
 #define ATOM_DP_ACTION_TRAINING_PATTERN_SEL				0x04
@@ -4318,7 +5273,7 @@
 #define ATOM_DP_ACTION_GET_VSWING_PREEMP					0x06
 #define ATOM_DP_ACTION_BLANKING                   0x07
 
-/*  ucConfig */
+// ucConfig
 #define ATOM_DP_CONFIG_ENCODER_SEL_MASK						0x03
 #define ATOM_DP_CONFIG_DIG1_ENCODER								0x00
 #define ATOM_DP_CONFIG_DIG2_ENCODER								0x01
@@ -4326,14 +5281,14 @@
 #define ATOM_DP_CONFIG_LINK_SEL_MASK							0x04
 #define ATOM_DP_CONFIG_LINK_A											0x00
 #define ATOM_DP_CONFIG_LINK_B											0x04
-
+/* /obselete */
 #define DP_ENCODER_SERVICE_PS_ALLOCATION				WRITE_ONE_BYTE_HW_I2C_DATA_PARAMETERS
 
-/*  DP_TRAINING_TABLE */
-#define DPCD_SET_LINKRATE_LANENUM_PATTERN1_TBL_ADDR				ATOM_DP_TRAINING_TBL_ADDR
+// DP_TRAINING_TABLE
+#define DPCD_SET_LINKRATE_LANENUM_PATTERN1_TBL_ADDR				ATOM_DP_TRAINING_TBL_ADDR		
 #define DPCD_SET_SS_CNTL_TBL_ADDR													(ATOM_DP_TRAINING_TBL_ADDR + 8 )
-#define DPCD_SET_LANE_VSWING_PREEMP_TBL_ADDR							(ATOM_DP_TRAINING_TBL_ADDR + 16)
-#define DPCD_SET_TRAINING_PATTERN0_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 24)
+#define DPCD_SET_LANE_VSWING_PREEMP_TBL_ADDR							(ATOM_DP_TRAINING_TBL_ADDR + 16 )
+#define DPCD_SET_TRAINING_PATTERN0_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 24 )
 #define DPCD_SET_TRAINING_PATTERN2_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 32)
 #define DPCD_GET_LINKRATE_LANENUM_SS_TBL_ADDR							(ATOM_DP_TRAINING_TBL_ADDR + 40)
 #define	DPCD_GET_LANE_STATUS_ADJUST_TBL_ADDR							(ATOM_DP_TRAINING_TBL_ADDR + 48)
@@ -4341,183 +5296,241 @@
 #define DP_I2C_AUX_DDC_WRITE_TBL_ADDR											(ATOM_DP_TRAINING_TBL_ADDR + 64)
 #define DP_I2C_AUX_DDC_READ_START_TBL_ADDR								(ATOM_DP_TRAINING_TBL_ADDR + 72)
 #define DP_I2C_AUX_DDC_READ_TBL_ADDR											(ATOM_DP_TRAINING_TBL_ADDR + 76)
-#define DP_I2C_AUX_DDC_READ_END_TBL_ADDR									(ATOM_DP_TRAINING_TBL_ADDR + 80)
+#define DP_I2C_AUX_DDC_WRITE_END_TBL_ADDR                 (ATOM_DP_TRAINING_TBL_ADDR + 80) 
+#define DP_I2C_AUX_DDC_READ_END_TBL_ADDR									(ATOM_DP_TRAINING_TBL_ADDR + 84)
 
-typedef struct _PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS {
-	UCHAR ucI2CSpeed;
-	union {
-		UCHAR ucRegIndex;
-		UCHAR ucStatus;
+typedef struct _PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS
+{
+	UCHAR   ucI2CSpeed;
+ 	union
+	{
+   UCHAR ucRegIndex;
+   UCHAR ucStatus;
 	};
-	USHORT lpI2CDataOut;
-	UCHAR ucFlag;
-	UCHAR ucTransBytes;
-	UCHAR ucSlaveAddr;
-	UCHAR ucLineNumber;
-} PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS;
+	USHORT  lpI2CDataOut;
+  UCHAR   ucFlag;               
+  UCHAR   ucTransBytes;
+  UCHAR   ucSlaveAddr;
+  UCHAR   ucLineNumber;
+}PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS;
 
 #define PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION       PROCESS_I2C_CHANNEL_TRANSACTION_PARAMETERS
 
-/* ucFlag */
+//ucFlag
 #define HW_I2C_WRITE        1
 #define HW_I2C_READ         0
+#define I2C_2BYTE_ADDR      0x02
 
-/****************************************************************************/
-/* Portion VI: Definitinos being oboselete */
+typedef struct _SET_HWBLOCK_INSTANCE_PARAMETER_V2
+{
+   UCHAR ucHWBlkInst;                // HW block instance, 0, 1, 2, ...
+   UCHAR ucReserved[3]; 
+}SET_HWBLOCK_INSTANCE_PARAMETER_V2;
+
+#define HWBLKINST_INSTANCE_MASK       0x07
+#define HWBLKINST_HWBLK_MASK          0xF0
+#define HWBLKINST_HWBLK_SHIFT         0x04
+
+//ucHWBlock
+#define SELECT_DISP_ENGINE            0
+#define SELECT_DISP_PLL               1
+#define SELECT_DCIO_UNIPHY_LINK0      2
+#define SELECT_DCIO_UNIPHY_LINK1      3
+#define SELECT_DCIO_IMPCAL            4
+#define SELECT_DCIO_DIG               6
+#define SELECT_CRTC_PIXEL_RATE        7
+
+/****************************************************************************/	
+//Portion VI: Definitinos for vbios MC scratch registers that driver used
 /****************************************************************************/
 
-/* ========================================================================================== */
-/* Remove the definitions below when driver is ready! */
-typedef struct _ATOM_DAC_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usMaxFrequency;	/*  in 10kHz unit */
-	USHORT usReserved;
-} ATOM_DAC_INFO;
+#define MC_MISC0__MEMORY_TYPE_MASK    0xF0000000
+#define MC_MISC0__MEMORY_TYPE__GDDR1  0x10000000
+#define MC_MISC0__MEMORY_TYPE__DDR2   0x20000000
+#define MC_MISC0__MEMORY_TYPE__GDDR3  0x30000000
+#define MC_MISC0__MEMORY_TYPE__GDDR4  0x40000000
+#define MC_MISC0__MEMORY_TYPE__GDDR5  0x50000000
+#define MC_MISC0__MEMORY_TYPE__DDR3   0xB0000000
 
-typedef struct _COMPASSIONATE_DATA {
-	ATOM_COMMON_TABLE_HEADER sHeader;
+/****************************************************************************/	
+//Portion VI: Definitinos being oboselete
+/****************************************************************************/
 
-	/* ==============================  DAC1 portion */
-	UCHAR ucDAC1_BG_Adjustment;
-	UCHAR ucDAC1_DAC_Adjustment;
-	USHORT usDAC1_FORCE_Data;
-	/* ==============================  DAC2 portion */
-	UCHAR ucDAC2_CRT2_BG_Adjustment;
-	UCHAR ucDAC2_CRT2_DAC_Adjustment;
-	USHORT usDAC2_CRT2_FORCE_Data;
-	USHORT usDAC2_CRT2_MUX_RegisterIndex;
-	UCHAR ucDAC2_CRT2_MUX_RegisterInfo;	/* Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low */
-	UCHAR ucDAC2_NTSC_BG_Adjustment;
-	UCHAR ucDAC2_NTSC_DAC_Adjustment;
-	USHORT usDAC2_TV1_FORCE_Data;
-	USHORT usDAC2_TV1_MUX_RegisterIndex;
-	UCHAR ucDAC2_TV1_MUX_RegisterInfo;	/* Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low */
-	UCHAR ucDAC2_CV_BG_Adjustment;
-	UCHAR ucDAC2_CV_DAC_Adjustment;
-	USHORT usDAC2_CV_FORCE_Data;
-	USHORT usDAC2_CV_MUX_RegisterIndex;
-	UCHAR ucDAC2_CV_MUX_RegisterInfo;	/* Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low */
-	UCHAR ucDAC2_PAL_BG_Adjustment;
-	UCHAR ucDAC2_PAL_DAC_Adjustment;
-	USHORT usDAC2_TV2_FORCE_Data;
-} COMPASSIONATE_DATA;
+//==========================================================================================
+//Remove the definitions below when driver is ready!
+typedef struct _ATOM_DAC_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  USHORT                   usMaxFrequency;      // in 10kHz unit
+  USHORT                   usReserved;
+}ATOM_DAC_INFO;
+
+
+typedef struct  _COMPASSIONATE_DATA           
+{
+  ATOM_COMMON_TABLE_HEADER sHeader; 
+
+  //==============================  DAC1 portion
+  UCHAR   ucDAC1_BG_Adjustment;
+  UCHAR   ucDAC1_DAC_Adjustment;
+  USHORT  usDAC1_FORCE_Data;
+  //==============================  DAC2 portion
+  UCHAR   ucDAC2_CRT2_BG_Adjustment;
+  UCHAR   ucDAC2_CRT2_DAC_Adjustment;
+  USHORT  usDAC2_CRT2_FORCE_Data;
+  USHORT  usDAC2_CRT2_MUX_RegisterIndex;
+  UCHAR   ucDAC2_CRT2_MUX_RegisterInfo;     //Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low
+  UCHAR   ucDAC2_NTSC_BG_Adjustment;
+  UCHAR   ucDAC2_NTSC_DAC_Adjustment;
+  USHORT  usDAC2_TV1_FORCE_Data;
+  USHORT  usDAC2_TV1_MUX_RegisterIndex;
+  UCHAR   ucDAC2_TV1_MUX_RegisterInfo;      //Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low
+  UCHAR   ucDAC2_CV_BG_Adjustment;
+  UCHAR   ucDAC2_CV_DAC_Adjustment;
+  USHORT  usDAC2_CV_FORCE_Data;
+  USHORT  usDAC2_CV_MUX_RegisterIndex;
+  UCHAR   ucDAC2_CV_MUX_RegisterInfo;       //Bit[4:0]=Bit position,Bit[7]=1:Active High;=0 Active Low
+  UCHAR   ucDAC2_PAL_BG_Adjustment;
+  UCHAR   ucDAC2_PAL_DAC_Adjustment;
+  USHORT  usDAC2_TV2_FORCE_Data;
+}COMPASSIONATE_DATA;
 
 /****************************Supported Device Info Table Definitions**********************/
-/*   ucConnectInfo: */
-/*     [7:4] - connector type */
-/*       = 1   - VGA connector */
-/*       = 2   - DVI-I */
-/*       = 3   - DVI-D */
-/*       = 4   - DVI-A */
-/*       = 5   - SVIDEO */
-/*       = 6   - COMPOSITE */
-/*       = 7   - LVDS */
-/*       = 8   - DIGITAL LINK */
-/*       = 9   - SCART */
-/*       = 0xA - HDMI_type A */
-/*       = 0xB - HDMI_type B */
-/*       = 0xE - Special case1 (DVI+DIN) */
-/*       Others=TBD */
-/*     [3:0] - DAC Associated */
-/*       = 0   - no DAC */
-/*       = 1   - DACA */
-/*       = 2   - DACB */
-/*       = 3   - External DAC */
-/*       Others=TBD */
-/*  */
+//  ucConnectInfo:
+//    [7:4] - connector type
+//      = 1   - VGA connector   
+//      = 2   - DVI-I
+//      = 3   - DVI-D
+//      = 4   - DVI-A
+//      = 5   - SVIDEO
+//      = 6   - COMPOSITE
+//      = 7   - LVDS
+//      = 8   - DIGITAL LINK
+//      = 9   - SCART
+//      = 0xA - HDMI_type A
+//      = 0xB - HDMI_type B
+//      = 0xE - Special case1 (DVI+DIN)
+//      Others=TBD
+//    [3:0] - DAC Associated
+//      = 0   - no DAC
+//      = 1   - DACA
+//      = 2   - DACB
+//      = 3   - External DAC
+//      Others=TBD
+//    
 
-typedef struct _ATOM_CONNECTOR_INFO {
+typedef struct _ATOM_CONNECTOR_INFO
+{
 #if ATOM_BIG_ENDIAN
-	UCHAR bfConnectorType:4;
-	UCHAR bfAssociatedDAC:4;
+  UCHAR   bfConnectorType:4;
+  UCHAR   bfAssociatedDAC:4;
 #else
-	UCHAR bfAssociatedDAC:4;
-	UCHAR bfConnectorType:4;
+  UCHAR   bfAssociatedDAC:4;
+  UCHAR   bfConnectorType:4;
 #endif
-} ATOM_CONNECTOR_INFO;
+}ATOM_CONNECTOR_INFO;
 
-typedef union _ATOM_CONNECTOR_INFO_ACCESS {
-	ATOM_CONNECTOR_INFO sbfAccess;
-	UCHAR ucAccess;
-} ATOM_CONNECTOR_INFO_ACCESS;
+typedef union _ATOM_CONNECTOR_INFO_ACCESS
+{
+  ATOM_CONNECTOR_INFO sbfAccess;
+  UCHAR               ucAccess;
+}ATOM_CONNECTOR_INFO_ACCESS;
 
-typedef struct _ATOM_CONNECTOR_INFO_I2C {
-	ATOM_CONNECTOR_INFO_ACCESS sucConnectorInfo;
-	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;
-} ATOM_CONNECTOR_INFO_I2C;
+typedef struct _ATOM_CONNECTOR_INFO_I2C
+{
+  ATOM_CONNECTOR_INFO_ACCESS sucConnectorInfo;
+  ATOM_I2C_ID_CONFIG_ACCESS  sucI2cId;
+}ATOM_CONNECTOR_INFO_I2C;
 
-typedef struct _ATOM_SUPPORTED_DEVICES_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usDeviceSupport;
-	ATOM_CONNECTOR_INFO_I2C asConnInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO];
-} ATOM_SUPPORTED_DEVICES_INFO;
+
+typedef struct _ATOM_SUPPORTED_DEVICES_INFO
+{ 
+  ATOM_COMMON_TABLE_HEADER	sHeader;
+  USHORT                    usDeviceSupport;
+  ATOM_CONNECTOR_INFO_I2C   asConnInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO];
+}ATOM_SUPPORTED_DEVICES_INFO;
 
 #define NO_INT_SRC_MAPPED       0xFF
 
-typedef struct _ATOM_CONNECTOR_INC_SRC_BITMAP {
-	UCHAR ucIntSrcBitmap;
-} ATOM_CONNECTOR_INC_SRC_BITMAP;
+typedef struct _ATOM_CONNECTOR_INC_SRC_BITMAP
+{
+  UCHAR   ucIntSrcBitmap;
+}ATOM_CONNECTOR_INC_SRC_BITMAP;
 
-typedef struct _ATOM_SUPPORTED_DEVICES_INFO_2 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usDeviceSupport;
-	ATOM_CONNECTOR_INFO_I2C asConnInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO_2];
-	ATOM_CONNECTOR_INC_SRC_BITMAP
-	    asIntSrcInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO_2];
-} ATOM_SUPPORTED_DEVICES_INFO_2;
+typedef struct _ATOM_SUPPORTED_DEVICES_INFO_2
+{ 
+  ATOM_COMMON_TABLE_HEADER      sHeader;
+  USHORT                        usDeviceSupport;
+  ATOM_CONNECTOR_INFO_I2C       asConnInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO_2];
+  ATOM_CONNECTOR_INC_SRC_BITMAP asIntSrcInfo[ATOM_MAX_SUPPORTED_DEVICE_INFO_2];
+}ATOM_SUPPORTED_DEVICES_INFO_2;
 
-typedef struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usDeviceSupport;
-	ATOM_CONNECTOR_INFO_I2C asConnInfo[ATOM_MAX_SUPPORTED_DEVICE];
-	ATOM_CONNECTOR_INC_SRC_BITMAP asIntSrcInfo[ATOM_MAX_SUPPORTED_DEVICE];
-} ATOM_SUPPORTED_DEVICES_INFO_2d1;
+typedef struct _ATOM_SUPPORTED_DEVICES_INFO_2d1
+{ 
+  ATOM_COMMON_TABLE_HEADER      sHeader;
+  USHORT                        usDeviceSupport;
+  ATOM_CONNECTOR_INFO_I2C       asConnInfo[ATOM_MAX_SUPPORTED_DEVICE];
+  ATOM_CONNECTOR_INC_SRC_BITMAP asIntSrcInfo[ATOM_MAX_SUPPORTED_DEVICE];
+}ATOM_SUPPORTED_DEVICES_INFO_2d1;
 
 #define ATOM_SUPPORTED_DEVICES_INFO_LAST ATOM_SUPPORTED_DEVICES_INFO_2d1
 
-typedef struct _ATOM_MISC_CONTROL_INFO {
-	USHORT usFrequency;
-	UCHAR ucPLL_ChargePump;	/*  PLL charge-pump gain control */
-	UCHAR ucPLL_DutyCycle;	/*  PLL duty cycle control */
-	UCHAR ucPLL_VCO_Gain;	/*  PLL VCO gain control */
-	UCHAR ucPLL_VoltageSwing;	/*  PLL driver voltage swing control */
-} ATOM_MISC_CONTROL_INFO;
+
+
+typedef struct _ATOM_MISC_CONTROL_INFO
+{
+   USHORT usFrequency;
+   UCHAR  ucPLL_ChargePump;				                // PLL charge-pump gain control
+   UCHAR  ucPLL_DutyCycle;				                // PLL duty cycle control
+   UCHAR  ucPLL_VCO_Gain;				                  // PLL VCO gain control
+   UCHAR  ucPLL_VoltageSwing;			                // PLL driver voltage swing control
+}ATOM_MISC_CONTROL_INFO;  
+
 
 #define ATOM_MAX_MISC_INFO       4
 
-typedef struct _ATOM_TMDS_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usMaxFrequency;	/*  in 10Khz */
-	ATOM_MISC_CONTROL_INFO asMiscInfo[ATOM_MAX_MISC_INFO];
-} ATOM_TMDS_INFO;
+typedef struct _ATOM_TMDS_INFO
+{
+  ATOM_COMMON_TABLE_HEADER sHeader;  
+  USHORT							usMaxFrequency;             // in 10Khz
+  ATOM_MISC_CONTROL_INFO				asMiscInfo[ATOM_MAX_MISC_INFO];
+}ATOM_TMDS_INFO;
 
-typedef struct _ATOM_ENCODER_ANALOG_ATTRIBUTE {
-	UCHAR ucTVStandard;	/* Same as TV standards defined above, */
-	UCHAR ucPadding[1];
-} ATOM_ENCODER_ANALOG_ATTRIBUTE;
 
-typedef struct _ATOM_ENCODER_DIGITAL_ATTRIBUTE {
-	UCHAR ucAttribute;	/* Same as other digital encoder attributes defined above */
-	UCHAR ucPadding[1];
-} ATOM_ENCODER_DIGITAL_ATTRIBUTE;
+typedef struct _ATOM_ENCODER_ANALOG_ATTRIBUTE
+{
+  UCHAR ucTVStandard;     //Same as TV standards defined above, 
+  UCHAR ucPadding[1];
+}ATOM_ENCODER_ANALOG_ATTRIBUTE;
 
-typedef union _ATOM_ENCODER_ATTRIBUTE {
-	ATOM_ENCODER_ANALOG_ATTRIBUTE sAlgAttrib;
-	ATOM_ENCODER_DIGITAL_ATTRIBUTE sDigAttrib;
-} ATOM_ENCODER_ATTRIBUTE;
+typedef struct _ATOM_ENCODER_DIGITAL_ATTRIBUTE
+{
+  UCHAR ucAttribute;      //Same as other digital encoder attributes defined above
+  UCHAR ucPadding[1];		
+}ATOM_ENCODER_DIGITAL_ATTRIBUTE;
 
-typedef struct _DVO_ENCODER_CONTROL_PARAMETERS {
-	USHORT usPixelClock;
-	USHORT usEncoderID;
-	UCHAR ucDeviceType;	/* Use ATOM_DEVICE_xxx1_Index to indicate device type only. */
-	UCHAR ucAction;		/* ATOM_ENABLE/ATOM_DISABLE/ATOM_HPD_INIT */
-	ATOM_ENCODER_ATTRIBUTE usDevAttr;
-} DVO_ENCODER_CONTROL_PARAMETERS;
+typedef union _ATOM_ENCODER_ATTRIBUTE
+{
+  ATOM_ENCODER_ANALOG_ATTRIBUTE sAlgAttrib;
+  ATOM_ENCODER_DIGITAL_ATTRIBUTE sDigAttrib;
+}ATOM_ENCODER_ATTRIBUTE;
 
-typedef struct _DVO_ENCODER_CONTROL_PS_ALLOCATION {
-	DVO_ENCODER_CONTROL_PARAMETERS sDVOEncoder;
-	WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION sReserved;	/* Caller doesn't need to init this portion */
-} DVO_ENCODER_CONTROL_PS_ALLOCATION;
+
+typedef struct _DVO_ENCODER_CONTROL_PARAMETERS
+{
+  USHORT usPixelClock; 
+  USHORT usEncoderID; 
+  UCHAR  ucDeviceType;												//Use ATOM_DEVICE_xxx1_Index to indicate device type only.	
+  UCHAR  ucAction;														//ATOM_ENABLE/ATOM_DISABLE/ATOM_HPD_INIT
+  ATOM_ENCODER_ATTRIBUTE usDevAttr;     		
+}DVO_ENCODER_CONTROL_PARAMETERS;
+
+typedef struct _DVO_ENCODER_CONTROL_PS_ALLOCATION
+{                               
+  DVO_ENCODER_CONTROL_PARAMETERS    sDVOEncoder;
+  WRITE_ONE_BYTE_HW_I2C_DATA_PS_ALLOCATION      sReserved;     //Caller doesn't need to init this portion
+}DVO_ENCODER_CONTROL_PS_ALLOCATION;
+
 
 #define ATOM_XTMDS_ASIC_SI164_ID        1
 #define ATOM_XTMDS_ASIC_SI178_ID        2
@@ -4526,27 +5539,30 @@
 #define ATOM_XTMDS_SUPPORTED_DUALLINK   0x00000002
 #define ATOM_XTMDS_MVPU_FPGA            0x00000004
 
-typedef struct _ATOM_XTMDS_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	USHORT usSingleLinkMaxFrequency;
-	ATOM_I2C_ID_CONFIG_ACCESS sucI2cId;	/* Point the ID on which I2C is used to control external chip */
-	UCHAR ucXtransimitterID;
-	UCHAR ucSupportedLink;	/*  Bit field, bit0=1, single link supported;bit1=1,dual link supported */
-	UCHAR ucSequnceAlterID;	/*  Even with the same external TMDS asic, it's possible that the program seqence alters */
-	/*  due to design. This ID is used to alert driver that the sequence is not "standard"! */
-	UCHAR ucMasterAddress;	/*  Address to control Master xTMDS Chip */
-	UCHAR ucSlaveAddress;	/*  Address to control Slave xTMDS Chip */
-} ATOM_XTMDS_INFO;
+                           
+typedef struct _ATOM_XTMDS_INFO
+{
+  ATOM_COMMON_TABLE_HEADER   sHeader;  
+  USHORT                     usSingleLinkMaxFrequency; 
+  ATOM_I2C_ID_CONFIG_ACCESS  sucI2cId;           //Point the ID on which I2C is used to control external chip
+  UCHAR                      ucXtransimitterID;          
+  UCHAR                      ucSupportedLink;    // Bit field, bit0=1, single link supported;bit1=1,dual link supported
+  UCHAR                      ucSequnceAlterID;   // Even with the same external TMDS asic, it's possible that the program seqence alters 
+                                                 // due to design. This ID is used to alert driver that the sequence is not "standard"!              
+  UCHAR                      ucMasterAddress;    // Address to control Master xTMDS Chip
+  UCHAR                      ucSlaveAddress;     // Address to control Slave xTMDS Chip
+}ATOM_XTMDS_INFO;
 
-typedef struct _DFP_DPMS_STATUS_CHANGE_PARAMETERS {
-	UCHAR ucEnable;		/*  ATOM_ENABLE=On or ATOM_DISABLE=Off */
-	UCHAR ucDevice;		/*  ATOM_DEVICE_DFP1_INDEX.... */
-	UCHAR ucPadding[2];
-} DFP_DPMS_STATUS_CHANGE_PARAMETERS;
+typedef struct _DFP_DPMS_STATUS_CHANGE_PARAMETERS
+{  
+  UCHAR ucEnable;                     // ATOM_ENABLE=On or ATOM_DISABLE=Off
+  UCHAR ucDevice;                     // ATOM_DEVICE_DFP1_INDEX....
+  UCHAR ucPadding[2];             
+}DFP_DPMS_STATUS_CHANGE_PARAMETERS;
 
 /****************************Legacy Power Play Table Definitions **********************/
 
-/* Definitions for ulPowerPlayMiscInfo */
+//Definitions for ulPowerPlayMiscInfo
 #define ATOM_PM_MISCINFO_SPLIT_CLOCK                     0x00000000L
 #define ATOM_PM_MISCINFO_USING_MCLK_SRC                  0x00000001L
 #define ATOM_PM_MISCINFO_USING_SCLK_SRC                  0x00000002L
@@ -4558,8 +5574,8 @@
 
 #define ATOM_PM_MISCINFO_ENGINE_CLOCK_CONTRL_EN          0x00000020L
 #define ATOM_PM_MISCINFO_MEMORY_CLOCK_CONTRL_EN          0x00000040L
-#define ATOM_PM_MISCINFO_PROGRAM_VOLTAGE                 0x00000080L	/* When this bit set, ucVoltageDropIndex is not an index for GPIO pin, but a voltage ID that SW needs program */
-
+#define ATOM_PM_MISCINFO_PROGRAM_VOLTAGE                 0x00000080L  //When this bit set, ucVoltageDropIndex is not an index for GPIO pin, but a voltage ID that SW needs program  
+ 
 #define ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN      0x00000100L
 #define ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN         0x00000200L
 #define ATOM_PM_MISCINFO_ASIC_SLEEP_MODE_EN              0x00000400L
@@ -4569,22 +5585,22 @@
 #define ATOM_PM_MISCINFO_LOW_LCD_REFRESH_RATE            0x00004000L
 
 #define ATOM_PM_MISCINFO_DRIVER_DEFAULT_MODE             0x00008000L
-#define ATOM_PM_MISCINFO_OVER_CLOCK_MODE                 0x00010000L
+#define ATOM_PM_MISCINFO_OVER_CLOCK_MODE                 0x00010000L 
 #define ATOM_PM_MISCINFO_OVER_DRIVE_MODE                 0x00020000L
 #define ATOM_PM_MISCINFO_POWER_SAVING_MODE               0x00040000L
 #define ATOM_PM_MISCINFO_THERMAL_DIODE_MODE              0x00080000L
 
-#define ATOM_PM_MISCINFO_FRAME_MODULATION_MASK           0x00300000L	/* 0-FM Disable, 1-2 level FM, 2-4 level FM, 3-Reserved */
-#define ATOM_PM_MISCINFO_FRAME_MODULATION_SHIFT          20
+#define ATOM_PM_MISCINFO_FRAME_MODULATION_MASK           0x00300000L  //0-FM Disable, 1-2 level FM, 2-4 level FM, 3-Reserved
+#define ATOM_PM_MISCINFO_FRAME_MODULATION_SHIFT          20 
 
 #define ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE                 0x00400000L
 #define ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2      0x00800000L
 #define ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4      0x01000000L
-#define ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN            0x02000000L	/* When set, Dynamic */
-#define ATOM_PM_MISCINFO_DYNAMIC_MC_HOST_BLOCK_EN        0x04000000L	/* When set, Dynamic */
-#define ATOM_PM_MISCINFO_3D_ACCELERATION_EN              0x08000000L	/* When set, This mode is for acceleated 3D mode */
+#define ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN            0x02000000L  //When set, Dynamic 
+#define ATOM_PM_MISCINFO_DYNAMIC_MC_HOST_BLOCK_EN        0x04000000L  //When set, Dynamic
+#define ATOM_PM_MISCINFO_3D_ACCELERATION_EN              0x08000000L  //When set, This mode is for acceleated 3D mode
 
-#define ATOM_PM_MISCINFO_POWERPLAY_SETTINGS_GROUP_MASK   0x70000000L	/* 1-Optimal Battery Life Group, 2-High Battery, 3-Balanced, 4-High Performance, 5- Optimal Performance (Default state with Default clocks) */
+#define ATOM_PM_MISCINFO_POWERPLAY_SETTINGS_GROUP_MASK   0x70000000L  //1-Optimal Battery Life Group, 2-High Battery, 3-Balanced, 4-High Performance, 5- Optimal Performance (Default state with Default clocks) 
 #define ATOM_PM_MISCINFO_POWERPLAY_SETTINGS_GROUP_SHIFT  28
 #define ATOM_PM_MISCINFO_ENABLE_BACK_BIAS                0x80000000L
 
@@ -4594,55 +5610,59 @@
 #define ATOM_PM_MISCINFO2_FS3D_OVERDRIVE_INFO            0x00000008L
 #define ATOM_PM_MISCINFO2_FORCEDLOWPWR_MODE              0x00000010L
 #define ATOM_PM_MISCINFO2_VDDCI_DYNAMIC_VOLTAGE_EN       0x00000020L
-#define ATOM_PM_MISCINFO2_VIDEO_PLAYBACK_CAPABLE         0x00000040L	/* If this bit is set in multi-pp mode, then driver will pack up one with the minior power consumption. */
-								      /* If it's not set in any pp mode, driver will use its default logic to pick a pp mode in video playback */
+#define ATOM_PM_MISCINFO2_VIDEO_PLAYBACK_CAPABLE         0x00000040L  //If this bit is set in multi-pp mode, then driver will pack up one with the minior power consumption. 
+                                                                      //If it's not set in any pp mode, driver will use its default logic to pick a pp mode in video playback
 #define ATOM_PM_MISCINFO2_NOT_VALID_ON_DC                0x00000080L
 #define ATOM_PM_MISCINFO2_STUTTER_MODE_EN                0x00000100L
-#define ATOM_PM_MISCINFO2_UVD_SUPPORT_MODE               0x00000200L
+#define ATOM_PM_MISCINFO2_UVD_SUPPORT_MODE               0x00000200L 
 
-/* ucTableFormatRevision=1 */
-/* ucTableContentRevision=1 */
-typedef struct _ATOM_POWERMODE_INFO {
-	ULONG ulMiscInfo;	/* The power level should be arranged in ascending order */
-	ULONG ulReserved1;	/*  must set to 0 */
-	ULONG ulReserved2;	/*  must set to 0 */
-	USHORT usEngineClock;
-	USHORT usMemoryClock;
-	UCHAR ucVoltageDropIndex;	/*  index to GPIO table */
-	UCHAR ucSelectedPanel_RefreshRate;	/*  panel refresh rate */
-	UCHAR ucMinTemperature;
-	UCHAR ucMaxTemperature;
-	UCHAR ucNumPciELanes;	/*  number of PCIE lanes */
-} ATOM_POWERMODE_INFO;
+//ucTableFormatRevision=1
+//ucTableContentRevision=1
+typedef struct  _ATOM_POWERMODE_INFO
+{
+  ULONG     ulMiscInfo;                 //The power level should be arranged in ascending order
+  ULONG     ulReserved1;                // must set to 0
+  ULONG     ulReserved2;                // must set to 0
+  USHORT    usEngineClock;
+  USHORT    usMemoryClock;
+  UCHAR     ucVoltageDropIndex;         // index to GPIO table
+  UCHAR     ucSelectedPanel_RefreshRate;// panel refresh rate
+  UCHAR     ucMinTemperature;
+  UCHAR     ucMaxTemperature;
+  UCHAR     ucNumPciELanes;             // number of PCIE lanes
+}ATOM_POWERMODE_INFO;
 
-/* ucTableFormatRevision=2 */
-/* ucTableContentRevision=1 */
-typedef struct _ATOM_POWERMODE_INFO_V2 {
-	ULONG ulMiscInfo;	/* The power level should be arranged in ascending order */
-	ULONG ulMiscInfo2;
-	ULONG ulEngineClock;
-	ULONG ulMemoryClock;
-	UCHAR ucVoltageDropIndex;	/*  index to GPIO table */
-	UCHAR ucSelectedPanel_RefreshRate;	/*  panel refresh rate */
-	UCHAR ucMinTemperature;
-	UCHAR ucMaxTemperature;
-	UCHAR ucNumPciELanes;	/*  number of PCIE lanes */
-} ATOM_POWERMODE_INFO_V2;
+//ucTableFormatRevision=2
+//ucTableContentRevision=1
+typedef struct  _ATOM_POWERMODE_INFO_V2
+{
+  ULONG     ulMiscInfo;                 //The power level should be arranged in ascending order
+  ULONG     ulMiscInfo2;                
+  ULONG     ulEngineClock;                
+  ULONG     ulMemoryClock;
+  UCHAR     ucVoltageDropIndex;         // index to GPIO table
+  UCHAR     ucSelectedPanel_RefreshRate;// panel refresh rate
+  UCHAR     ucMinTemperature;
+  UCHAR     ucMaxTemperature;
+  UCHAR     ucNumPciELanes;             // number of PCIE lanes
+}ATOM_POWERMODE_INFO_V2;
 
-/* ucTableFormatRevision=2 */
-/* ucTableContentRevision=2 */
-typedef struct _ATOM_POWERMODE_INFO_V3 {
-	ULONG ulMiscInfo;	/* The power level should be arranged in ascending order */
-	ULONG ulMiscInfo2;
-	ULONG ulEngineClock;
-	ULONG ulMemoryClock;
-	UCHAR ucVoltageDropIndex;	/*  index to Core (VDDC) votage table */
-	UCHAR ucSelectedPanel_RefreshRate;	/*  panel refresh rate */
-	UCHAR ucMinTemperature;
-	UCHAR ucMaxTemperature;
-	UCHAR ucNumPciELanes;	/*  number of PCIE lanes */
-	UCHAR ucVDDCI_VoltageDropIndex;	/*  index to VDDCI votage table */
-} ATOM_POWERMODE_INFO_V3;
+//ucTableFormatRevision=2
+//ucTableContentRevision=2
+typedef struct  _ATOM_POWERMODE_INFO_V3
+{
+  ULONG     ulMiscInfo;                 //The power level should be arranged in ascending order
+  ULONG     ulMiscInfo2;                
+  ULONG     ulEngineClock;                
+  ULONG     ulMemoryClock;
+  UCHAR     ucVoltageDropIndex;         // index to Core (VDDC) votage table
+  UCHAR     ucSelectedPanel_RefreshRate;// panel refresh rate
+  UCHAR     ucMinTemperature;
+  UCHAR     ucMaxTemperature;
+  UCHAR     ucNumPciELanes;             // number of PCIE lanes
+  UCHAR     ucVDDCI_VoltageDropIndex;   // index to VDDCI votage table
+}ATOM_POWERMODE_INFO_V3;
+
 
 #define ATOM_MAX_NUMBEROF_POWER_BLOCK  8
 
@@ -4655,40 +5675,44 @@
 #define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_MUA6649   0x04
 #define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_LM64      0x05
 #define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_F75375    0x06
-#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_ASC7512   0x07	/*  Andigilog */
+#define ATOM_PP_OVERDRIVE_THERMALCONTROLLER_ASC7512   0x07	// Andigilog
 
-typedef struct _ATOM_POWERPLAY_INFO {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR ucOverdriveThermalController;
-	UCHAR ucOverdriveI2cLine;
-	UCHAR ucOverdriveIntBitmap;
-	UCHAR ucOverdriveControllerAddress;
-	UCHAR ucSizeOfPowerModeEntry;
-	UCHAR ucNumOfPowerModeEntries;
-	ATOM_POWERMODE_INFO asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
-} ATOM_POWERPLAY_INFO;
 
-typedef struct _ATOM_POWERPLAY_INFO_V2 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR ucOverdriveThermalController;
-	UCHAR ucOverdriveI2cLine;
-	UCHAR ucOverdriveIntBitmap;
-	UCHAR ucOverdriveControllerAddress;
-	UCHAR ucSizeOfPowerModeEntry;
-	UCHAR ucNumOfPowerModeEntries;
-	ATOM_POWERMODE_INFO_V2 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
-} ATOM_POWERPLAY_INFO_V2;
+typedef struct  _ATOM_POWERPLAY_INFO
+{
+  ATOM_COMMON_TABLE_HEADER	sHeader; 
+  UCHAR    ucOverdriveThermalController;
+  UCHAR    ucOverdriveI2cLine;
+  UCHAR    ucOverdriveIntBitmap;
+  UCHAR    ucOverdriveControllerAddress;
+  UCHAR    ucSizeOfPowerModeEntry;
+  UCHAR    ucNumOfPowerModeEntries;
+  ATOM_POWERMODE_INFO asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
+}ATOM_POWERPLAY_INFO;
 
-typedef struct _ATOM_POWERPLAY_INFO_V3 {
-	ATOM_COMMON_TABLE_HEADER sHeader;
-	UCHAR ucOverdriveThermalController;
-	UCHAR ucOverdriveI2cLine;
-	UCHAR ucOverdriveIntBitmap;
-	UCHAR ucOverdriveControllerAddress;
-	UCHAR ucSizeOfPowerModeEntry;
-	UCHAR ucNumOfPowerModeEntries;
-	ATOM_POWERMODE_INFO_V3 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
-} ATOM_POWERPLAY_INFO_V3;
+typedef struct  _ATOM_POWERPLAY_INFO_V2
+{
+  ATOM_COMMON_TABLE_HEADER	sHeader; 
+  UCHAR    ucOverdriveThermalController;
+  UCHAR    ucOverdriveI2cLine;
+  UCHAR    ucOverdriveIntBitmap;
+  UCHAR    ucOverdriveControllerAddress;
+  UCHAR    ucSizeOfPowerModeEntry;
+  UCHAR    ucNumOfPowerModeEntries;
+  ATOM_POWERMODE_INFO_V2 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
+}ATOM_POWERPLAY_INFO_V2;
+  
+typedef struct  _ATOM_POWERPLAY_INFO_V3
+{
+  ATOM_COMMON_TABLE_HEADER	sHeader; 
+  UCHAR    ucOverdriveThermalController;
+  UCHAR    ucOverdriveI2cLine;
+  UCHAR    ucOverdriveIntBitmap;
+  UCHAR    ucOverdriveControllerAddress;
+  UCHAR    ucSizeOfPowerModeEntry;
+  UCHAR    ucNumOfPowerModeEntries;
+  ATOM_POWERMODE_INFO_V3 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
+}ATOM_POWERPLAY_INFO_V3;
 
 /* New PPlib */
 /**************************************************************************/
@@ -4873,40 +5897,42 @@
       UCHAR  ucMaxHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}
       UCHAR  ucMinHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could be bigger as display BW requriement.
       USHORT usHTLinkFreq;                // See definition ATOM_PPLIB_RS780_HTLINKFREQ_xxx or in MHz(>=200).
-      ULONG  ulFlags;
+      ULONG  ulFlags; 
 } ATOM_PPLIB_RS780_CLOCK_INFO;
 
-#define ATOM_PPLIB_RS780_VOLTAGE_NONE       0
-#define ATOM_PPLIB_RS780_VOLTAGE_LOW        1
-#define ATOM_PPLIB_RS780_VOLTAGE_HIGH       2
-#define ATOM_PPLIB_RS780_VOLTAGE_VARIABLE   3
+#define ATOM_PPLIB_RS780_VOLTAGE_NONE       0 
+#define ATOM_PPLIB_RS780_VOLTAGE_LOW        1 
+#define ATOM_PPLIB_RS780_VOLTAGE_HIGH       2 
+#define ATOM_PPLIB_RS780_VOLTAGE_VARIABLE   3 
 
 #define ATOM_PPLIB_RS780_SPMCLK_NONE        0   // We cannot change the side port memory clock, leave it as it is.
 #define ATOM_PPLIB_RS780_SPMCLK_LOW         1
 #define ATOM_PPLIB_RS780_SPMCLK_HIGH        2
 
-#define ATOM_PPLIB_RS780_HTLINKFREQ_NONE       0
-#define ATOM_PPLIB_RS780_HTLINKFREQ_LOW        1
-#define ATOM_PPLIB_RS780_HTLINKFREQ_HIGH       2
+#define ATOM_PPLIB_RS780_HTLINKFREQ_NONE       0 
+#define ATOM_PPLIB_RS780_HTLINKFREQ_LOW        1 
+#define ATOM_PPLIB_RS780_HTLINKFREQ_HIGH       2 
 
 /**************************************************************************/
 
-/*  Following definitions are for compatiblity issue in different SW components. */
+
+// Following definitions are for compatiblity issue in different SW components. 
 #define ATOM_MASTER_DATA_TABLE_REVISION   0x01
-#define Object_Info												Object_Header
+#define Object_Info												Object_Header			
 #define	AdjustARB_SEQ											MC_InitParameter
 #define	VRAM_GPIO_DetectionInfo						VoltageObjectInfo
-#define	ASIC_VDDCI_Info                   ASIC_ProfilingInfo
+#define	ASIC_VDDCI_Info                   ASIC_ProfilingInfo														
 #define ASIC_MVDDQ_Info										MemoryTrainingInfo
-#define SS_Info                           PPLL_SS_Info
+#define SS_Info                           PPLL_SS_Info                      
 #define ASIC_MVDDC_Info                   ASIC_InternalSS_Info
 #define DispDevicePriorityInfo						SaveRestoreInfo
 #define DispOutInfo												TV_VideoMode
 
+
 #define ATOM_ENCODER_OBJECT_TABLE         ATOM_OBJECT_TABLE
 #define ATOM_CONNECTOR_OBJECT_TABLE       ATOM_OBJECT_TABLE
 
-/* New device naming, remove them when both DAL/VBIOS is ready */
+//New device naming, remove them when both DAL/VBIOS is ready
 #define DFP2I_OUTPUT_CONTROL_PARAMETERS    CRT1_OUTPUT_CONTROL_PARAMETERS
 #define DFP2I_OUTPUT_CONTROL_PS_ALLOCATION DFP2I_OUTPUT_CONTROL_PARAMETERS
 
@@ -4921,7 +5947,7 @@
 
 #define ATOM_DEVICE_DFP1I_INDEX            ATOM_DEVICE_DFP1_INDEX
 #define ATOM_DEVICE_DFP1X_INDEX            ATOM_DEVICE_DFP2_INDEX
-
+ 
 #define ATOM_DEVICE_DFP2I_INDEX            0x00000009
 #define ATOM_DEVICE_DFP2I_SUPPORT          (0x1L << ATOM_DEVICE_DFP2I_INDEX)
 
@@ -4939,7 +5965,7 @@
 
 #define ATOM_S3_DFP2I_ACTIVEb1             0x02
 
-#define ATOM_S3_DFP1I_ACTIVE               ATOM_S3_DFP1_ACTIVE
+#define ATOM_S3_DFP1I_ACTIVE               ATOM_S3_DFP1_ACTIVE 
 #define ATOM_S3_DFP1X_ACTIVE               ATOM_S3_DFP2_ACTIVE
 
 #define ATOM_S3_DFP2I_ACTIVE               0x00000200L
@@ -4958,14 +5984,14 @@
 #define ATOM_S6_ACC_REQ_DFP2Ib3            0x02
 #define ATOM_S6_ACC_REQ_DFP2I              0x02000000L
 
-#define TMDS1XEncoderControl               DVOEncoderControl
+#define TMDS1XEncoderControl               DVOEncoderControl           
 #define DFP1XOutputControl                 DVOOutputControl
 
 #define ExternalDFPOutputControl           DFP1XOutputControl
 #define EnableExternalTMDS_Encoder         TMDS1XEncoderControl
 
 #define DFP1IOutputControl                 TMDSAOutputControl
-#define DFP2IOutputControl                 LVTMAOutputControl
+#define DFP2IOutputControl                 LVTMAOutputControl      
 
 #define DAC1_ENCODER_CONTROL_PARAMETERS    DAC_ENCODER_CONTROL_PARAMETERS
 #define DAC1_ENCODER_CONTROL_PS_ALLOCATION DAC_ENCODER_CONTROL_PS_ALLOCATION
@@ -4974,7 +6000,7 @@
 #define DAC2_ENCODER_CONTROL_PS_ALLOCATION DAC_ENCODER_CONTROL_PS_ALLOCATION
 
 #define ucDac1Standard  ucDacStandard
-#define ucDac2Standard  ucDacStandard
+#define ucDac2Standard  ucDacStandard  
 
 #define TMDS1EncoderControl TMDSAEncoderControl
 #define TMDS2EncoderControl LVTMAEncoderControl
@@ -4984,12 +6010,56 @@
 #define CRT1OutputControl   DAC1OutputControl
 #define CRT2OutputControl   DAC2OutputControl
 
-/* These two lines will be removed for sure in a few days, will follow up with Michael V. */
+//These two lines will be removed for sure in a few days, will follow up with Michael V.
 #define EnableLVDS_SS   EnableSpreadSpectrumOnPPLL
-#define ENABLE_LVDS_SS_PARAMETERS_V3  ENABLE_SPREAD_SPECTRUM_ON_PPLL
+#define ENABLE_LVDS_SS_PARAMETERS_V3  ENABLE_SPREAD_SPECTRUM_ON_PPLL  
+
+//#define ATOM_S2_CRT1_DPMS_STATE         0x00010000L
+//#define ATOM_S2_LCD1_DPMS_STATE	        ATOM_S2_CRT1_DPMS_STATE
+//#define ATOM_S2_TV1_DPMS_STATE          ATOM_S2_CRT1_DPMS_STATE
+//#define ATOM_S2_DFP1_DPMS_STATE         ATOM_S2_CRT1_DPMS_STATE
+//#define ATOM_S2_CRT2_DPMS_STATE         ATOM_S2_CRT1_DPMS_STATE
+
+#define ATOM_S6_ACC_REQ_TV2             0x00400000L
+#define ATOM_DEVICE_TV2_INDEX           0x00000006
+#define ATOM_DEVICE_TV2_SUPPORT         (0x1L << ATOM_DEVICE_TV2_INDEX)
+#define ATOM_S0_TV2                     0x00100000L
+#define ATOM_S3_TV2_ACTIVE              ATOM_S3_DFP6_ACTIVE
+#define ATOM_S3_TV2_CRTC_ACTIVE         ATOM_S3_DFP6_CRTC_ACTIVE
+
+//
+#define ATOM_S2_CRT1_DPMS_STATE         0x00010000L
+#define ATOM_S2_LCD1_DPMS_STATE	        0x00020000L
+#define ATOM_S2_TV1_DPMS_STATE          0x00040000L
+#define ATOM_S2_DFP1_DPMS_STATE         0x00080000L
+#define ATOM_S2_CRT2_DPMS_STATE         0x00100000L
+#define ATOM_S2_LCD2_DPMS_STATE         0x00200000L
+#define ATOM_S2_TV2_DPMS_STATE          0x00400000L
+#define ATOM_S2_DFP2_DPMS_STATE         0x00800000L
+#define ATOM_S2_CV_DPMS_STATE           0x01000000L
+#define ATOM_S2_DFP3_DPMS_STATE					0x02000000L
+#define ATOM_S2_DFP4_DPMS_STATE					0x04000000L
+#define ATOM_S2_DFP5_DPMS_STATE					0x08000000L
+
+#define ATOM_S2_CRT1_DPMS_STATEb2       0x01
+#define ATOM_S2_LCD1_DPMS_STATEb2       0x02
+#define ATOM_S2_TV1_DPMS_STATEb2        0x04
+#define ATOM_S2_DFP1_DPMS_STATEb2       0x08
+#define ATOM_S2_CRT2_DPMS_STATEb2       0x10
+#define ATOM_S2_LCD2_DPMS_STATEb2       0x20
+#define ATOM_S2_TV2_DPMS_STATEb2        0x40
+#define ATOM_S2_DFP2_DPMS_STATEb2       0x80
+#define ATOM_S2_CV_DPMS_STATEb3         0x01
+#define ATOM_S2_DFP3_DPMS_STATEb3				0x02
+#define ATOM_S2_DFP4_DPMS_STATEb3				0x04
+#define ATOM_S2_DFP5_DPMS_STATEb3				0x08
+
+#define ATOM_S3_ASIC_GUI_ENGINE_HUNGb3	0x20
+#define ATOM_S3_ALLOW_FAST_PWR_SWITCHb3 0x40
+#define ATOM_S3_RQST_GPU_USE_MIN_PWRb3  0x80
 
 /*********************************************************************************/
 
-#pragma pack()			/*  BIOS data must use byte aligment */
+#pragma pack() // BIOS data must use byte aligment
 
 #endif /* _ATOMBIOS_H */
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index af464e35..dd9fdf5 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -245,21 +245,25 @@
 
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
-		atombios_enable_crtc(crtc, 1);
+		atombios_enable_crtc(crtc, ATOM_ENABLE);
 		if (ASIC_IS_DCE3(rdev))
-			atombios_enable_crtc_memreq(crtc, 1);
-		atombios_blank_crtc(crtc, 0);
-		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+			atombios_enable_crtc_memreq(crtc, ATOM_ENABLE);
+		atombios_blank_crtc(crtc, ATOM_DISABLE);
+		/* XXX re-enable when interrupt support is added */
+		if (!ASIC_IS_DCE4(rdev))
+			drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
-		atombios_blank_crtc(crtc, 1);
+		/* XXX re-enable when interrupt support is added */
+		if (!ASIC_IS_DCE4(rdev))
+			drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
+		atombios_blank_crtc(crtc, ATOM_ENABLE);
 		if (ASIC_IS_DCE3(rdev))
-			atombios_enable_crtc_memreq(crtc, 0);
-		atombios_enable_crtc(crtc, 0);
+			atombios_enable_crtc_memreq(crtc, ATOM_DISABLE);
+		atombios_enable_crtc(crtc, ATOM_DISABLE);
 		break;
 	}
 }
@@ -349,6 +353,11 @@
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
+union atom_enable_ss {
+	ENABLE_LVDS_SS_PARAMETERS legacy;
+	ENABLE_SPREAD_SPECTRUM_ON_PPLL_PS_ALLOCATION v1;
+};
+
 static void atombios_set_ss(struct drm_crtc *crtc, int enable)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
@@ -358,11 +367,14 @@
 	struct radeon_encoder *radeon_encoder = NULL;
 	struct radeon_encoder_atom_dig *dig = NULL;
 	int index = GetIndexIntoMasterTable(COMMAND, EnableSpreadSpectrumOnPPLL);
-	ENABLE_SPREAD_SPECTRUM_ON_PPLL_PS_ALLOCATION args;
-	ENABLE_LVDS_SS_PARAMETERS legacy_args;
+	union atom_enable_ss args;
 	uint16_t percentage = 0;
 	uint8_t type = 0, step = 0, delay = 0, range = 0;
 
+	/* XXX add ss support for DCE4 */
+	if (ASIC_IS_DCE4(rdev))
+		return;
+
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
 			radeon_encoder = to_radeon_encoder(encoder);
@@ -386,29 +398,28 @@
 	if (!radeon_encoder)
 		return;
 
+	memset(&args, 0, sizeof(args));
 	if (ASIC_IS_AVIVO(rdev)) {
-		memset(&args, 0, sizeof(args));
-		args.usSpreadSpectrumPercentage = cpu_to_le16(percentage);
-		args.ucSpreadSpectrumType = type;
-		args.ucSpreadSpectrumStep = step;
-		args.ucSpreadSpectrumDelay = delay;
-		args.ucSpreadSpectrumRange = range;
-		args.ucPpll = radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
-		args.ucEnable = enable;
-		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+		args.v1.usSpreadSpectrumPercentage = cpu_to_le16(percentage);
+		args.v1.ucSpreadSpectrumType = type;
+		args.v1.ucSpreadSpectrumStep = step;
+		args.v1.ucSpreadSpectrumDelay = delay;
+		args.v1.ucSpreadSpectrumRange = range;
+		args.v1.ucPpll = radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+		args.v1.ucEnable = enable;
 	} else {
-		memset(&legacy_args, 0, sizeof(legacy_args));
-		legacy_args.usSpreadSpectrumPercentage = cpu_to_le16(percentage);
-		legacy_args.ucSpreadSpectrumType = type;
-		legacy_args.ucSpreadSpectrumStepSize_Delay = (step & 3) << 2;
-		legacy_args.ucSpreadSpectrumStepSize_Delay |= (delay & 7) << 4;
-		legacy_args.ucEnable = enable;
-		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&legacy_args);
+		args.legacy.usSpreadSpectrumPercentage = cpu_to_le16(percentage);
+		args.legacy.ucSpreadSpectrumType = type;
+		args.legacy.ucSpreadSpectrumStepSize_Delay = (step & 3) << 2;
+		args.legacy.ucSpreadSpectrumStepSize_Delay |= (delay & 7) << 4;
+		args.legacy.ucEnable = enable;
 	}
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
 union adjust_pixel_clock {
 	ADJUST_DISPLAY_PLL_PS_ALLOCATION v1;
+	ADJUST_DISPLAY_PLL_PS_ALLOCATION_V3 v3;
 };
 
 static u32 atombios_adjust_pll(struct drm_crtc *crtc,
@@ -420,10 +431,24 @@
 	struct drm_encoder *encoder = NULL;
 	struct radeon_encoder *radeon_encoder = NULL;
 	u32 adjusted_clock = mode->clock;
+	int encoder_mode = 0;
 
 	/* reset the pll flags */
 	pll->flags = 0;
 
+	/* select the PLL algo */
+	if (ASIC_IS_AVIVO(rdev)) {
+		if (radeon_new_pll == 0)
+			pll->algo = PLL_ALGO_LEGACY;
+		else
+			pll->algo = PLL_ALGO_NEW;
+	} else {
+		if (radeon_new_pll == 1)
+			pll->algo = PLL_ALGO_NEW;
+		else
+			pll->algo = PLL_ALGO_LEGACY;
+	}
+
 	if (ASIC_IS_AVIVO(rdev)) {
 		if ((rdev->family == CHIP_RS600) ||
 		    (rdev->family == CHIP_RS690) ||
@@ -448,10 +473,16 @@
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
 			radeon_encoder = to_radeon_encoder(encoder);
+			encoder_mode = atombios_get_encoder_mode(encoder);
 			if (ASIC_IS_AVIVO(rdev)) {
 				/* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */
 				if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1)
 					adjusted_clock = mode->clock * 2;
+				/* LVDS PLL quirks */
+				if (encoder->encoder_type == DRM_MODE_ENCODER_LVDS) {
+					struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+					pll->algo = dig->pll_algo;
+				}
 			} else {
 				if (encoder->encoder_type != DRM_MODE_ENCODER_DAC)
 					pll->flags |= RADEON_PLL_NO_ODD_POST_DIV;
@@ -468,14 +499,9 @@
 	 */
 	if (ASIC_IS_DCE3(rdev)) {
 		union adjust_pixel_clock args;
-		struct radeon_encoder_atom_dig *dig;
 		u8 frev, crev;
 		int index;
 
-		if (!radeon_encoder->enc_priv)
-			return adjusted_clock;
-		dig = radeon_encoder->enc_priv;
-
 		index = GetIndexIntoMasterTable(COMMAND, AdjustDisplayPll);
 		atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev,
 				      &crev);
@@ -489,12 +515,51 @@
 			case 2:
 				args.v1.usPixelClock = cpu_to_le16(mode->clock / 10);
 				args.v1.ucTransmitterID = radeon_encoder->encoder_id;
-				args.v1.ucEncodeMode = atombios_get_encoder_mode(encoder);
+				args.v1.ucEncodeMode = encoder_mode;
 
 				atom_execute_table(rdev->mode_info.atom_context,
 						   index, (uint32_t *)&args);
 				adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
 				break;
+			case 3:
+				args.v3.sInput.usPixelClock = cpu_to_le16(mode->clock / 10);
+				args.v3.sInput.ucTransmitterID = radeon_encoder->encoder_id;
+				args.v3.sInput.ucEncodeMode = encoder_mode;
+				args.v3.sInput.ucDispPllConfig = 0;
+				if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+					struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+
+					if (encoder_mode == ATOM_ENCODER_MODE_DP)
+						args.v3.sInput.ucDispPllConfig |=
+							DISPPLL_CONFIG_COHERENT_MODE;
+					else {
+						if (dig->coherent_mode)
+							args.v3.sInput.ucDispPllConfig |=
+								DISPPLL_CONFIG_COHERENT_MODE;
+						if (mode->clock > 165000)
+							args.v3.sInput.ucDispPllConfig |=
+								DISPPLL_CONFIG_DUAL_LINK;
+					}
+				} else if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+					/* may want to enable SS on DP/eDP eventually */
+					args.v3.sInput.ucDispPllConfig |=
+						DISPPLL_CONFIG_SS_ENABLE;
+					if (mode->clock > 165000)
+						args.v3.sInput.ucDispPllConfig |=
+							DISPPLL_CONFIG_DUAL_LINK;
+				}
+				atom_execute_table(rdev->mode_info.atom_context,
+						   index, (uint32_t *)&args);
+				adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
+				if (args.v3.sOutput.ucRefDiv) {
+					pll->flags |= RADEON_PLL_USE_REF_DIV;
+					pll->reference_div = args.v3.sOutput.ucRefDiv;
+				}
+				if (args.v3.sOutput.ucPostDiv) {
+					pll->flags |= RADEON_PLL_USE_POST_DIV;
+					pll->post_div = args.v3.sOutput.ucPostDiv;
+				}
+				break;
 			default:
 				DRM_ERROR("Unknown table version %d %d\n", frev, crev);
 				return adjusted_clock;
@@ -513,9 +578,47 @@
 	PIXEL_CLOCK_PARAMETERS v1;
 	PIXEL_CLOCK_PARAMETERS_V2 v2;
 	PIXEL_CLOCK_PARAMETERS_V3 v3;
+	PIXEL_CLOCK_PARAMETERS_V5 v5;
 };
 
-void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
+static void atombios_crtc_set_dcpll(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	u8 frev, crev;
+	int index;
+	union set_pixel_clock args;
+
+	memset(&args, 0, sizeof(args));
+
+	index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
+	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev,
+			      &crev);
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 5:
+			/* if the default dcpll clock is specified,
+			 * SetPixelClock provides the dividers
+			 */
+			args.v5.ucCRTC = ATOM_CRTC_INVALID;
+			args.v5.usPixelClock = rdev->clock.default_dispclk;
+			args.v5.ucPpll = ATOM_DCPLL;
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+			return;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+		return;
+	}
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
@@ -529,12 +632,14 @@
 	u32 ref_div = 0, fb_div = 0, frac_fb_div = 0, post_div = 0;
 	struct radeon_pll *pll;
 	u32 adjusted_clock;
+	int encoder_mode = 0;
 
 	memset(&args, 0, sizeof(args));
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
 			radeon_encoder = to_radeon_encoder(encoder);
+			encoder_mode = atombios_get_encoder_mode(encoder);
 			break;
 		}
 	}
@@ -542,26 +647,24 @@
 	if (!radeon_encoder)
 		return;
 
-	if (radeon_crtc->crtc_id == 0)
+	switch (radeon_crtc->pll_id) {
+	case ATOM_PPLL1:
 		pll = &rdev->clock.p1pll;
-	else
+		break;
+	case ATOM_PPLL2:
 		pll = &rdev->clock.p2pll;
+		break;
+	case ATOM_DCPLL:
+	case ATOM_PPLL_INVALID:
+		pll = &rdev->clock.dcpll;
+		break;
+	}
 
 	/* adjust pixel clock as needed */
 	adjusted_clock = atombios_adjust_pll(crtc, mode, pll);
 
-	if (ASIC_IS_AVIVO(rdev)) {
-		if (radeon_new_pll)
-			radeon_compute_pll_avivo(pll, adjusted_clock, &pll_clock,
-						 &fb_div, &frac_fb_div,
-						 &ref_div, &post_div);
-		else
-			radeon_compute_pll(pll, adjusted_clock, &pll_clock,
-					   &fb_div, &frac_fb_div,
-					   &ref_div, &post_div);
-	} else
-		radeon_compute_pll(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
-				   &ref_div, &post_div);
+	radeon_compute_pll(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
+			   &ref_div, &post_div);
 
 	index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
 	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev,
@@ -576,8 +679,7 @@
 			args.v1.usFbDiv = cpu_to_le16(fb_div);
 			args.v1.ucFracFbDiv = frac_fb_div;
 			args.v1.ucPostDiv = post_div;
-			args.v1.ucPpll =
-			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+			args.v1.ucPpll = radeon_crtc->pll_id;
 			args.v1.ucCRTC = radeon_crtc->crtc_id;
 			args.v1.ucRefDivSrc = 1;
 			break;
@@ -587,8 +689,7 @@
 			args.v2.usFbDiv = cpu_to_le16(fb_div);
 			args.v2.ucFracFbDiv = frac_fb_div;
 			args.v2.ucPostDiv = post_div;
-			args.v2.ucPpll =
-			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+			args.v2.ucPpll = radeon_crtc->pll_id;
 			args.v2.ucCRTC = radeon_crtc->crtc_id;
 			args.v2.ucRefDivSrc = 1;
 			break;
@@ -598,12 +699,22 @@
 			args.v3.usFbDiv = cpu_to_le16(fb_div);
 			args.v3.ucFracFbDiv = frac_fb_div;
 			args.v3.ucPostDiv = post_div;
-			args.v3.ucPpll =
-			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
-			args.v3.ucMiscInfo = (radeon_crtc->crtc_id << 2);
+			args.v3.ucPpll = radeon_crtc->pll_id;
+			args.v3.ucMiscInfo = (radeon_crtc->pll_id << 2);
 			args.v3.ucTransmitterId = radeon_encoder->encoder_id;
-			args.v3.ucEncoderMode =
-			    atombios_get_encoder_mode(encoder);
+			args.v3.ucEncoderMode = encoder_mode;
+			break;
+		case 5:
+			args.v5.ucCRTC = radeon_crtc->crtc_id;
+			args.v5.usPixelClock = cpu_to_le16(mode->clock / 10);
+			args.v5.ucRefDiv = ref_div;
+			args.v5.usFbDiv = cpu_to_le16(fb_div);
+			args.v5.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
+			args.v5.ucPostDiv = post_div;
+			args.v5.ucMiscInfo = 0; /* HDMI depth, etc. */
+			args.v5.ucTransmitterID = radeon_encoder->encoder_id;
+			args.v5.ucEncoderMode = encoder_mode;
+			args.v5.ucPpll = radeon_crtc->pll_id;
 			break;
 		default:
 			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
@@ -618,6 +729,140 @@
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
+static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+				   struct drm_framebuffer *old_fb)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_framebuffer *radeon_fb;
+	struct drm_gem_object *obj;
+	struct radeon_bo *rbo;
+	uint64_t fb_location;
+	uint32_t fb_format, fb_pitch_pixels, tiling_flags;
+	int r;
+
+	/* no fb bound */
+	if (!crtc->fb) {
+		DRM_DEBUG("No FB bound\n");
+		return 0;
+	}
+
+	radeon_fb = to_radeon_framebuffer(crtc->fb);
+
+	/* Pin framebuffer & get tilling informations */
+	obj = radeon_fb->obj;
+	rbo = obj->driver_private;
+	r = radeon_bo_reserve(rbo, false);
+	if (unlikely(r != 0))
+		return r;
+	r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location);
+	if (unlikely(r != 0)) {
+		radeon_bo_unreserve(rbo);
+		return -EINVAL;
+	}
+	radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL);
+	radeon_bo_unreserve(rbo);
+
+	switch (crtc->fb->bits_per_pixel) {
+	case 8:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_8BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_INDEXED));
+		break;
+	case 15:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_16BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB1555));
+		break;
+	case 16:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_16BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB565));
+		break;
+	case 24:
+	case 32:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_32BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB8888));
+		break;
+	default:
+		DRM_ERROR("Unsupported screen depth %d\n",
+			  crtc->fb->bits_per_pixel);
+		return -EINVAL;
+	}
+
+	switch (radeon_crtc->crtc_id) {
+	case 0:
+		WREG32(AVIVO_D1VGA_CONTROL, 0);
+		break;
+	case 1:
+		WREG32(AVIVO_D2VGA_CONTROL, 0);
+		break;
+	case 2:
+		WREG32(EVERGREEN_D3VGA_CONTROL, 0);
+		break;
+	case 3:
+		WREG32(EVERGREEN_D4VGA_CONTROL, 0);
+		break;
+	case 4:
+		WREG32(EVERGREEN_D5VGA_CONTROL, 0);
+		break;
+	case 5:
+		WREG32(EVERGREEN_D6VGA_CONTROL, 0);
+		break;
+	default:
+		break;
+	}
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+	       (u32)fb_location & EVERGREEN_GRPH_SURFACE_ADDRESS_MASK);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+	       (u32) fb_location & EVERGREEN_GRPH_SURFACE_ADDRESS_MASK);
+	WREG32(EVERGREEN_GRPH_CONTROL + radeon_crtc->crtc_offset, fb_format);
+
+	WREG32(EVERGREEN_GRPH_SURFACE_OFFSET_X + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_X_START + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_Y_START + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_X_END + radeon_crtc->crtc_offset, crtc->fb->width);
+	WREG32(EVERGREEN_GRPH_Y_END + radeon_crtc->crtc_offset, crtc->fb->height);
+
+	fb_pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8);
+	WREG32(EVERGREEN_GRPH_PITCH + radeon_crtc->crtc_offset, fb_pitch_pixels);
+	WREG32(EVERGREEN_GRPH_ENABLE + radeon_crtc->crtc_offset, 1);
+
+	WREG32(EVERGREEN_DESKTOP_HEIGHT + radeon_crtc->crtc_offset,
+	       crtc->mode.vdisplay);
+	x &= ~3;
+	y &= ~1;
+	WREG32(EVERGREEN_VIEWPORT_START + radeon_crtc->crtc_offset,
+	       (x << 16) | y);
+	WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
+	       (crtc->mode.hdisplay << 16) | crtc->mode.vdisplay);
+
+	if (crtc->mode.flags & DRM_MODE_FLAG_INTERLACE)
+		WREG32(EVERGREEN_DATA_FORMAT + radeon_crtc->crtc_offset,
+		       EVERGREEN_INTERLEAVE_EN);
+	else
+		WREG32(EVERGREEN_DATA_FORMAT + radeon_crtc->crtc_offset, 0);
+
+	if (old_fb && old_fb != crtc->fb) {
+		radeon_fb = to_radeon_framebuffer(old_fb);
+		rbo = radeon_fb->obj->driver_private;
+		r = radeon_bo_reserve(rbo, false);
+		if (unlikely(r != 0))
+			return r;
+		radeon_bo_unpin(rbo);
+		radeon_bo_unreserve(rbo);
+	}
+
+	/* Bytes per pixel may have changed */
+	radeon_bandwidth_update(rdev);
+
+	return 0;
+}
+
 static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 			       struct drm_framebuffer *old_fb)
 {
@@ -755,7 +1000,9 @@
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev))
+	if (ASIC_IS_DCE4(rdev))
+		return evergreen_crtc_set_base(crtc, x, y, old_fb);
+	else if (ASIC_IS_AVIVO(rdev))
 		return avivo_crtc_set_base(crtc, x, y, old_fb);
 	else
 		return radeon_crtc_set_base(crtc, x, y, old_fb);
@@ -785,6 +1032,46 @@
 	}
 }
 
+static int radeon_atom_pick_pll(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_encoder *test_encoder;
+	struct drm_crtc *test_crtc;
+	uint32_t pll_in_use = 0;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		/* if crtc is driving DP and we have an ext clock, use that */
+		list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
+			if (test_encoder->crtc && (test_encoder->crtc == crtc)) {
+				if (atombios_get_encoder_mode(test_encoder) == ATOM_ENCODER_MODE_DP) {
+					if (rdev->clock.dp_extclk)
+						return ATOM_PPLL_INVALID;
+				}
+			}
+		}
+
+		/* otherwise, pick one of the plls */
+		list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+			struct radeon_crtc *radeon_test_crtc;
+
+			if (crtc == test_crtc)
+				continue;
+
+			radeon_test_crtc = to_radeon_crtc(test_crtc);
+			if ((radeon_test_crtc->pll_id >= ATOM_PPLL1) &&
+			    (radeon_test_crtc->pll_id <= ATOM_PPLL2))
+				pll_in_use |= (1 << radeon_test_crtc->pll_id);
+		}
+		if (!(pll_in_use & 1))
+			return ATOM_PPLL1;
+		return ATOM_PPLL2;
+	} else
+		return radeon_crtc->crtc_id;
+
+}
+
 int atombios_crtc_mode_set(struct drm_crtc *crtc,
 			   struct drm_display_mode *mode,
 			   struct drm_display_mode *adjusted_mode,
@@ -796,19 +1083,27 @@
 
 	/* TODO color tiling */
 
+	/* pick pll */
+	radeon_crtc->pll_id = radeon_atom_pick_pll(crtc);
+
 	atombios_set_ss(crtc, 0);
+	/* always set DCPLL */
+	if (ASIC_IS_DCE4(rdev))
+		atombios_crtc_set_dcpll(crtc);
 	atombios_crtc_set_pll(crtc, adjusted_mode);
 	atombios_set_ss(crtc, 1);
-	atombios_crtc_set_timing(crtc, adjusted_mode);
 
-	if (ASIC_IS_AVIVO(rdev))
-		atombios_crtc_set_base(crtc, x, y, old_fb);
+	if (ASIC_IS_DCE4(rdev))
+		atombios_set_crtc_dtd_timing(crtc, adjusted_mode);
+	else if (ASIC_IS_AVIVO(rdev))
+		atombios_crtc_set_timing(crtc, adjusted_mode);
 	else {
+		atombios_crtc_set_timing(crtc, adjusted_mode);
 		if (radeon_crtc->crtc_id == 0)
 			atombios_set_crtc_dtd_timing(crtc, adjusted_mode);
-		atombios_crtc_set_base(crtc, x, y, old_fb);
 		radeon_legacy_atom_fixup(crtc);
 	}
+	atombios_crtc_set_base(crtc, x, y, old_fb);
 	atombios_overscan_setup(crtc, mode, adjusted_mode);
 	atombios_scaler_setup(crtc);
 	return 0;
@@ -825,14 +1120,14 @@
 
 static void atombios_crtc_prepare(struct drm_crtc *crtc)
 {
-	atombios_lock_crtc(crtc, 1);
+	atombios_lock_crtc(crtc, ATOM_ENABLE);
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 }
 
 static void atombios_crtc_commit(struct drm_crtc *crtc)
 {
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
-	atombios_lock_crtc(crtc, 0);
+	atombios_lock_crtc(crtc, ATOM_DISABLE);
 }
 
 static const struct drm_crtc_helper_funcs atombios_helper_funcs = {
@@ -848,8 +1143,37 @@
 void radeon_atombios_init_crtc(struct drm_device *dev,
 			       struct radeon_crtc *radeon_crtc)
 {
-	if (radeon_crtc->crtc_id == 1)
-		radeon_crtc->crtc_offset =
-		    AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL;
+	struct radeon_device *rdev = dev->dev_private;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		switch (radeon_crtc->crtc_id) {
+		case 0:
+		default:
+			radeon_crtc->crtc_offset = EVERGREEN_CRTC0_REGISTER_OFFSET;
+			break;
+		case 1:
+			radeon_crtc->crtc_offset = EVERGREEN_CRTC1_REGISTER_OFFSET;
+			break;
+		case 2:
+			radeon_crtc->crtc_offset = EVERGREEN_CRTC2_REGISTER_OFFSET;
+			break;
+		case 3:
+			radeon_crtc->crtc_offset = EVERGREEN_CRTC3_REGISTER_OFFSET;
+			break;
+		case 4:
+			radeon_crtc->crtc_offset = EVERGREEN_CRTC4_REGISTER_OFFSET;
+			break;
+		case 5:
+			radeon_crtc->crtc_offset = EVERGREEN_CRTC5_REGISTER_OFFSET;
+			break;
+		}
+	} else {
+		if (radeon_crtc->crtc_id == 1)
+			radeon_crtc->crtc_offset =
+				AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL;
+		else
+			radeon_crtc->crtc_offset = 0;
+	}
+	radeon_crtc->pll_id = -1;
 	drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs);
 }
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 99915a6..8a133bd 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -321,6 +321,10 @@
 		train_set[lane] = v | p;
 }
 
+union aux_channel_transaction {
+	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
+	PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
+};
 
 /* radeon aux chan functions */
 bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes,
@@ -329,7 +333,7 @@
 {
 	struct drm_device *dev = chan->dev;
 	struct radeon_device *rdev = dev->dev_private;
-	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	union aux_channel_transaction args;
 	int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
 	unsigned char *base;
 	int retry_count = 0;
@@ -341,31 +345,33 @@
 retry:
 	memcpy(base, req_bytes, num_bytes);
 
-	args.lpAuxRequest = 0;
-	args.lpDataOut = 16;
-	args.ucDataOutLen = 0;
-	args.ucChannelID = chan->rec.i2c_id;
-	args.ucDelay = delay / 10;
+	args.v1.lpAuxRequest = 0;
+	args.v1.lpDataOut = 16;
+	args.v1.ucDataOutLen = 0;
+	args.v1.ucChannelID = chan->rec.i2c_id;
+	args.v1.ucDelay = delay / 10;
+	if (ASIC_IS_DCE4(rdev))
+		args.v2.ucHPD_ID = chan->rec.hpd_id;
 
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 
-	if (args.ucReplyStatus && !args.ucDataOutLen) {
-		if (args.ucReplyStatus == 0x20 && retry_count++ < 10)
+	if (args.v1.ucReplyStatus && !args.v1.ucDataOutLen) {
+		if (args.v1.ucReplyStatus == 0x20 && retry_count++ < 10)
 			goto retry;
 		DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x after %d retries\n",
 			  req_bytes[1], req_bytes[0], req_bytes[2], req_bytes[3],
-			  chan->rec.i2c_id, args.ucReplyStatus, retry_count);
+			  chan->rec.i2c_id, args.v1.ucReplyStatus, retry_count);
 		return false;
 	}
 
-	if (args.ucDataOutLen && read_byte && read_buf_len) {
-		if (read_buf_len < args.ucDataOutLen) {
+	if (args.v1.ucDataOutLen && read_byte && read_buf_len) {
+		if (read_buf_len < args.v1.ucDataOutLen) {
 			DRM_ERROR("Buffer to small for return answer %d %d\n",
-				  read_buf_len, args.ucDataOutLen);
+				  read_buf_len, args.v1.ucDataOutLen);
 			return false;
 		}
 		{
-			int len = min(read_buf_len, args.ucDataOutLen);
+			int len = min(read_buf_len, args.v1.ucDataOutLen);
 			memcpy(read_byte, base + 16, len);
 		}
 	}
@@ -626,12 +632,19 @@
 	dp_set_link_bw_lanes(radeon_connector, link_configuration);
 	/* disable downspread on the sink */
 	dp_set_downspread(radeon_connector, 0);
-	/* start training on the source */
-	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_START,
-				  dig_connector->dp_clock, enc_id, 0);
-	/* set training pattern 1 on the source */
-	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
-				  dig_connector->dp_clock, enc_id, 0);
+	if (ASIC_IS_DCE4(rdev)) {
+		/* start training on the source */
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_START);
+		/* set training pattern 1 on the source */
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1);
+	} else {
+		/* start training on the source */
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_START,
+					  dig_connector->dp_clock, enc_id, 0);
+		/* set training pattern 1 on the source */
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
+					  dig_connector->dp_clock, enc_id, 0);
+	}
 
 	/* set initial vs/emph */
 	memset(train_set, 0, 4);
@@ -691,8 +704,11 @@
 	/* set training pattern 2 on the sink */
 	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_2);
 	/* set training pattern 2 on the source */
-	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
-				  dig_connector->dp_clock, enc_id, 1);
+	if (ASIC_IS_DCE4(rdev))
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2);
+	else
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
+					  dig_connector->dp_clock, enc_id, 1);
 
 	/* channel equalization loop */
 	tries = 0;
@@ -729,7 +745,11 @@
 			  >> DP_TRAIN_PRE_EMPHASIS_SHIFT);
 
 	/* disable the training pattern on the sink */
-	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_DISABLE);
+	if (ASIC_IS_DCE4(rdev))
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE);
+	else
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_COMPLETE,
+					  dig_connector->dp_clock, enc_id, 0);
 
 	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_COMPLETE,
 				  dig_connector->dp_clock, enc_id, 0);
diff --git a/drivers/gpu/drm/radeon/avivod.h b/drivers/gpu/drm/radeon/avivod.h
index d4e6e6e..3c391e7 100644
--- a/drivers/gpu/drm/radeon/avivod.h
+++ b/drivers/gpu/drm/radeon/avivod.h
@@ -30,11 +30,13 @@
 
 #define	D1CRTC_CONTROL					0x6080
 #define		CRTC_EN						(1 << 0)
+#define	D1CRTC_STATUS					0x609c
 #define	D1CRTC_UPDATE_LOCK				0x60E8
 #define	D1GRPH_PRIMARY_SURFACE_ADDRESS			0x6110
 #define	D1GRPH_SECONDARY_SURFACE_ADDRESS		0x6118
 
 #define	D2CRTC_CONTROL					0x6880
+#define	D2CRTC_STATUS					0x689c
 #define	D2CRTC_UPDATE_LOCK				0x68E8
 #define	D2GRPH_PRIMARY_SURFACE_ADDRESS			0x6910
 #define	D2GRPH_SECONDARY_SURFACE_ADDRESS		0x6918
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
new file mode 100644
index 0000000..bd2e7aa
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -0,0 +1,767 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include "drmP.h"
+#include "radeon.h"
+#include "radeon_drm.h"
+#include "rv770d.h"
+#include "atom.h"
+#include "avivod.h"
+#include "evergreen_reg.h"
+
+static void evergreen_gpu_init(struct radeon_device *rdev);
+void evergreen_fini(struct radeon_device *rdev);
+
+bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
+{
+	bool connected = false;
+	/* XXX */
+	return connected;
+}
+
+void evergreen_hpd_set_polarity(struct radeon_device *rdev,
+				enum radeon_hpd_id hpd)
+{
+	/* XXX */
+}
+
+void evergreen_hpd_init(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+
+void evergreen_bandwidth_update(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+void evergreen_hpd_fini(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+static int evergreen_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	u32 tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(SRBM_STATUS) & 0x1F00;
+		if (!tmp)
+			return 0;
+		udelay(1);
+	}
+	return -1;
+}
+
+/*
+ * GART
+ */
+int evergreen_pcie_gart_enable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int r, i;
+
+	if (rdev->gart.table.vram.robj == NULL) {
+		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
+		return -EINVAL;
+	}
+	r = radeon_gart_table_vram_pin(rdev);
+	if (r)
+		return r;
+	radeon_gart_restore(rdev);
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+			(u32)(rdev->dummy_page.addr >> 12));
+	for (i = 1; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	r600_pcie_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void evergreen_pcie_gart_disable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int i, r;
+
+	/* Disable all tables */
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	if (rdev->gart.table.vram.robj) {
+		r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
+		if (likely(r == 0)) {
+			radeon_bo_kunmap(rdev->gart.table.vram.robj);
+			radeon_bo_unpin(rdev->gart.table.vram.robj);
+			radeon_bo_unreserve(rdev->gart.table.vram.robj);
+		}
+	}
+}
+
+void evergreen_pcie_gart_fini(struct radeon_device *rdev)
+{
+	evergreen_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+void evergreen_agp_enable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int i;
+
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+}
+
+static void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
+{
+	save->vga_control[0] = RREG32(D1VGA_CONTROL);
+	save->vga_control[1] = RREG32(D2VGA_CONTROL);
+	save->vga_control[2] = RREG32(EVERGREEN_D3VGA_CONTROL);
+	save->vga_control[3] = RREG32(EVERGREEN_D4VGA_CONTROL);
+	save->vga_control[4] = RREG32(EVERGREEN_D5VGA_CONTROL);
+	save->vga_control[5] = RREG32(EVERGREEN_D6VGA_CONTROL);
+	save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
+	save->vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+	save->crtc_control[0] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET);
+	save->crtc_control[1] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET);
+	save->crtc_control[2] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET);
+	save->crtc_control[3] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET);
+	save->crtc_control[4] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET);
+	save->crtc_control[5] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+
+	/* Stop all video */
+	WREG32(VGA_RENDER_CONTROL, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+
+	WREG32(D1VGA_CONTROL, 0);
+	WREG32(D2VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D3VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D4VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D5VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D6VGA_CONTROL, 0);
+}
+
+static void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save)
+{
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start);
+	/* Unlock host access */
+	WREG32(VGA_HDP_CONTROL, save->vga_hdp_control);
+	mdelay(1);
+	/* Restore video state */
+	WREG32(D1VGA_CONTROL, save->vga_control[0]);
+	WREG32(D2VGA_CONTROL, save->vga_control[1]);
+	WREG32(EVERGREEN_D3VGA_CONTROL, save->vga_control[2]);
+	WREG32(EVERGREEN_D4VGA_CONTROL, save->vga_control[3]);
+	WREG32(EVERGREEN_D5VGA_CONTROL, save->vga_control[4]);
+	WREG32(EVERGREEN_D6VGA_CONTROL, save->vga_control[5]);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, save->crtc_control[0]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, save->crtc_control[1]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, save->crtc_control[2]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, save->crtc_control[3]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, save->crtc_control[4]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, save->crtc_control[5]);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+	WREG32(VGA_RENDER_CONTROL, save->vga_render_control);
+}
+
+static void evergreen_mc_program(struct radeon_device *rdev)
+{
+	struct evergreen_mc_save save;
+	u32 tmp;
+	int i, j;
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+	evergreen_mc_stop(rdev, &save);
+	if (evergreen_mc_wait_for_idle(rdev)) {
+		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+	}
+	/* Lockout access through VGA aperture*/
+	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+	/* Update configuration */
+	if (rdev->flags & RADEON_IS_AGP) {
+		if (rdev->mc.vram_start < rdev->mc.gtt_start) {
+			/* VRAM before AGP */
+			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+				rdev->mc.vram_start >> 12);
+			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+				rdev->mc.gtt_end >> 12);
+		} else {
+			/* VRAM after AGP */
+			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+				rdev->mc.gtt_start >> 12);
+			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+				rdev->mc.vram_end >> 12);
+		}
+	} else {
+		WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+			rdev->mc.vram_start >> 12);
+		WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+			rdev->mc.vram_end >> 12);
+	}
+	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
+	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+	WREG32(MC_VM_FB_LOCATION, tmp);
+	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+	WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+	if (rdev->flags & RADEON_IS_AGP) {
+		WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16);
+		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+	} else {
+		WREG32(MC_VM_AGP_BASE, 0);
+		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	}
+	if (evergreen_mc_wait_for_idle(rdev)) {
+		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+	}
+	evergreen_mc_resume(rdev, &save);
+	/* we need to own VRAM, so turn off the VGA renderer here
+	 * to stop it overwriting our objects */
+	rv515_vga_render_disable(rdev);
+}
+
+#if 0
+/*
+ * CP.
+ */
+static void evergreen_cp_stop(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+
+static int evergreen_cp_load_microcode(struct radeon_device *rdev)
+{
+	/* XXX */
+
+	return 0;
+}
+
+
+/*
+ * Core functions
+ */
+static u32 evergreen_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+						  u32 num_backends,
+						  u32 backend_disable_mask)
+{
+	u32 backend_map = 0;
+
+	return backend_map;
+}
+#endif
+
+static void evergreen_gpu_init(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+int evergreen_mc_init(struct radeon_device *rdev)
+{
+	fixed20_12 a;
+	u32 tmp;
+	int chansize, numchan;
+
+	/* Get VRAM informations */
+	rdev->mc.vram_is_ddr = true;
+	tmp = RREG32(MC_ARB_RAMCFG);
+	if (tmp & CHANSIZE_OVERRIDE) {
+		chansize = 16;
+	} else if (tmp & CHANSIZE_MASK) {
+		chansize = 64;
+	} else {
+		chansize = 32;
+	}
+	tmp = RREG32(MC_SHARED_CHMAP);
+	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+	case 0:
+	default:
+		numchan = 1;
+		break;
+	case 1:
+		numchan = 2;
+		break;
+	case 2:
+		numchan = 4;
+		break;
+	case 3:
+		numchan = 8;
+		break;
+	}
+	rdev->mc.vram_width = numchan * chansize;
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* Setup GPU memory space */
+	/* size in MB on evergreen */
+	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+	rdev->mc.visible_vram_size = rdev->mc.aper_size;
+	/* FIXME remove this once we support unmappable VRAM */
+	if (rdev->mc.mc_vram_size > rdev->mc.aper_size) {
+		rdev->mc.mc_vram_size = rdev->mc.aper_size;
+		rdev->mc.real_vram_size = rdev->mc.aper_size;
+	}
+	r600_vram_gtt_location(rdev, &rdev->mc);
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	return 0;
+}
+
+int evergreen_gpu_reset(struct radeon_device *rdev)
+{
+	/* FIXME: implement for evergreen */
+	return 0;
+}
+
+static int evergreen_startup(struct radeon_device *rdev)
+{
+#if 0
+	int r;
+
+	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
+		r = r600_init_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load firmware!\n");
+			return r;
+		}
+	}
+#endif
+	evergreen_mc_program(rdev);
+#if 0
+	if (rdev->flags & RADEON_IS_AGP) {
+		evergreem_agp_enable(rdev);
+	} else {
+		r = evergreen_pcie_gart_enable(rdev);
+		if (r)
+			return r;
+	}
+#endif
+	evergreen_gpu_init(rdev);
+#if 0
+	if (!rdev->r600_blit.shader_obj) {
+		r = r600_blit_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failed blitter (%d).\n", r);
+			return r;
+		}
+	}
+
+	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+	if (unlikely(r != 0))
+		return r;
+	r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+			&rdev->r600_blit.shader_gpu_addr);
+	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+	if (r) {
+		DRM_ERROR("failed to pin blit object %d\n", r);
+		return r;
+	}
+
+	/* Enable IRQ */
+	r = r600_irq_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: IH init failed (%d).\n", r);
+		radeon_irq_kms_fini(rdev);
+		return r;
+	}
+	r600_irq_set(rdev);
+
+	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	if (r)
+		return r;
+	r = evergreen_cp_load_microcode(rdev);
+	if (r)
+		return r;
+	r = r600_cp_resume(rdev);
+	if (r)
+		return r;
+	/* write back buffer are not vital so don't worry about failure */
+	r600_wb_enable(rdev);
+#endif
+	return 0;
+}
+
+int evergreen_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
+	 * posting will perform necessary task to bring back GPU into good
+	 * shape.
+	 */
+	/* post card */
+	atom_asic_init(rdev->mode_info.atom_context);
+	/* Initialize clocks */
+	r = radeon_clocks_init(rdev);
+	if (r) {
+		return r;
+	}
+
+	r = evergreen_startup(rdev);
+	if (r) {
+		DRM_ERROR("r600 startup failed on resume\n");
+		return r;
+	}
+#if 0
+	r = r600_ib_test(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+		return r;
+	}
+#endif
+	return r;
+
+}
+
+int evergreen_suspend(struct radeon_device *rdev)
+{
+#if 0
+	int r;
+
+	/* FIXME: we should wait for ring to be empty */
+	r700_cp_stop(rdev);
+	rdev->cp.ready = false;
+	r600_wb_disable(rdev);
+	evergreen_pcie_gart_disable(rdev);
+	/* unpin shaders bo */
+	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+	if (likely(r == 0)) {
+		radeon_bo_unpin(rdev->r600_blit.shader_obj);
+		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+	}
+#endif
+	return 0;
+}
+
+static bool evergreen_card_posted(struct radeon_device *rdev)
+{
+	u32 reg;
+
+	/* first check CRTCs */
+	reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+	if (reg & EVERGREEN_CRTC_MASTER_EN)
+		return true;
+
+	/* then check MEM_SIZE, in case the crtcs are off */
+	if (RREG32(CONFIG_MEMSIZE))
+		return true;
+
+	return false;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int evergreen_init(struct radeon_device *rdev)
+{
+	int r;
+
+	r = radeon_dummy_page_init(rdev);
+	if (r)
+		return r;
+	/* This don't do much */
+	r = radeon_gem_init(rdev);
+	if (r)
+		return r;
+	/* Read BIOS */
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	/* Must be an ATOMBIOS */
+	if (!rdev->is_atom_bios) {
+		dev_err(rdev->dev, "Expecting atombios for R600 GPU\n");
+		return -EINVAL;
+	}
+	r = radeon_atombios_init(rdev);
+	if (r)
+		return r;
+	/* Post card if necessary */
+	if (!evergreen_card_posted(rdev)) {
+		if (!rdev->bios) {
+			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
+			return -EINVAL;
+		}
+		DRM_INFO("GPU not posted. posting now...\n");
+		atom_asic_init(rdev->mode_info.atom_context);
+	}
+	/* Initialize scratch registers */
+	r600_scratch_init(rdev);
+	/* Initialize surface registers */
+	radeon_surface_init(rdev);
+	/* Initialize clocks */
+	radeon_get_clock_info(rdev->ddev);
+	r = radeon_clocks_init(rdev);
+	if (r)
+		return r;
+	/* Initialize power management */
+	radeon_pm_init(rdev);
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r)
+		return r;
+	/* initialize AGP */
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r)
+			radeon_agp_disable(rdev);
+	}
+	/* initialize memory controller */
+	r = evergreen_mc_init(rdev);
+	if (r)
+		return r;
+	/* Memory manager */
+	r = radeon_bo_init(rdev);
+	if (r)
+		return r;
+#if 0
+	r = radeon_irq_kms_init(rdev);
+	if (r)
+		return r;
+
+	rdev->cp.ring_obj = NULL;
+	r600_ring_init(rdev, 1024 * 1024);
+
+	rdev->ih.ring_obj = NULL;
+	r600_ih_ring_init(rdev, 64 * 1024);
+
+	r = r600_pcie_gart_init(rdev);
+	if (r)
+		return r;
+#endif
+	rdev->accel_working = false;
+	r = evergreen_startup(rdev);
+	if (r) {
+		evergreen_suspend(rdev);
+		/*r600_wb_fini(rdev);*/
+		/*radeon_ring_fini(rdev);*/
+		/*evergreen_pcie_gart_fini(rdev);*/
+		rdev->accel_working = false;
+	}
+	if (rdev->accel_working) {
+		r = radeon_ib_pool_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = r600_ib_test(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failed testing IB (%d).\n", r);
+			rdev->accel_working = false;
+		}
+	}
+	return 0;
+}
+
+void evergreen_fini(struct radeon_device *rdev)
+{
+	evergreen_suspend(rdev);
+#if 0
+	r600_blit_fini(rdev);
+	r600_irq_fini(rdev);
+	radeon_irq_kms_fini(rdev);
+	radeon_ring_fini(rdev);
+	r600_wb_fini(rdev);
+	evergreen_pcie_gart_fini(rdev);
+#endif
+	radeon_gem_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_clocks_fini(rdev);
+	radeon_agp_fini(rdev);
+	radeon_bo_fini(rdev);
+	radeon_atombios_fini(rdev);
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	radeon_dummy_page_fini(rdev);
+}
diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h
new file mode 100644
index 0000000..f7c7c96
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen_reg.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#ifndef __EVERGREEN_REG_H__
+#define __EVERGREEN_REG_H__
+
+/* evergreen */
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS               0x310
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH          0x324
+#define EVERGREEN_D3VGA_CONTROL                         0x3e0
+#define EVERGREEN_D4VGA_CONTROL                         0x3e4
+#define EVERGREEN_D5VGA_CONTROL                         0x3e8
+#define EVERGREEN_D6VGA_CONTROL                         0x3ec
+
+#define EVERGREEN_P1PLL_SS_CNTL                         0x414
+#define EVERGREEN_P2PLL_SS_CNTL                         0x454
+#       define EVERGREEN_PxPLL_SS_EN                    (1 << 12)
+/* GRPH blocks at 0x6800, 0x7400, 0x10000, 0x10c00, 0x11800, 0x12400 */
+#define EVERGREEN_GRPH_ENABLE                           0x6800
+#define EVERGREEN_GRPH_CONTROL                          0x6804
+#       define EVERGREEN_GRPH_DEPTH(x)                  (((x) & 0x3) << 0)
+#       define EVERGREEN_GRPH_DEPTH_8BPP                0
+#       define EVERGREEN_GRPH_DEPTH_16BPP               1
+#       define EVERGREEN_GRPH_DEPTH_32BPP               2
+#       define EVERGREEN_GRPH_FORMAT(x)                 (((x) & 0x7) << 8)
+/* 8 BPP */
+#       define EVERGREEN_GRPH_FORMAT_INDEXED            0
+/* 16 BPP */
+#       define EVERGREEN_GRPH_FORMAT_ARGB1555           0
+#       define EVERGREEN_GRPH_FORMAT_ARGB565            1
+#       define EVERGREEN_GRPH_FORMAT_ARGB4444           2
+#       define EVERGREEN_GRPH_FORMAT_AI88               3
+#       define EVERGREEN_GRPH_FORMAT_MONO16             4
+#       define EVERGREEN_GRPH_FORMAT_BGRA5551           5
+/* 32 BPP */
+#       define EVERGREEN_GRPH_FORMAT_ARGB8888           0
+#       define EVERGREEN_GRPH_FORMAT_ARGB2101010        1
+#       define EVERGREEN_GRPH_FORMAT_32BPP_DIG          2
+#       define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010     3
+#       define EVERGREEN_GRPH_FORMAT_BGRA1010102        4
+#       define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102     5
+#       define EVERGREEN_GRPH_FORMAT_RGB111110          6
+#       define EVERGREEN_GRPH_FORMAT_BGR101111          7
+#define EVERGREEN_GRPH_SWAP_CONTROL                     0x680c
+#       define EVERGREEN_GRPH_ENDIAN_SWAP(x)            (((x) & 0x3) << 0)
+#       define EVERGREEN_GRPH_ENDIAN_NONE               0
+#       define EVERGREEN_GRPH_ENDIAN_8IN16              1
+#       define EVERGREEN_GRPH_ENDIAN_8IN32              2
+#       define EVERGREEN_GRPH_ENDIAN_8IN64              3
+#       define EVERGREEN_GRPH_RED_CROSSBAR(x)           (((x) & 0x3) << 4)
+#       define EVERGREEN_GRPH_RED_SEL_R                 0
+#       define EVERGREEN_GRPH_RED_SEL_G                 1
+#       define EVERGREEN_GRPH_RED_SEL_B                 2
+#       define EVERGREEN_GRPH_RED_SEL_A                 3
+#       define EVERGREEN_GRPH_GREEN_CROSSBAR(x)         (((x) & 0x3) << 6)
+#       define EVERGREEN_GRPH_GREEN_SEL_G               0
+#       define EVERGREEN_GRPH_GREEN_SEL_B               1
+#       define EVERGREEN_GRPH_GREEN_SEL_A               2
+#       define EVERGREEN_GRPH_GREEN_SEL_R               3
+#       define EVERGREEN_GRPH_BLUE_CROSSBAR(x)          (((x) & 0x3) << 8)
+#       define EVERGREEN_GRPH_BLUE_SEL_B                0
+#       define EVERGREEN_GRPH_BLUE_SEL_A                1
+#       define EVERGREEN_GRPH_BLUE_SEL_R                2
+#       define EVERGREEN_GRPH_BLUE_SEL_G                3
+#       define EVERGREEN_GRPH_ALPHA_CROSSBAR(x)         (((x) & 0x3) << 10)
+#       define EVERGREEN_GRPH_ALPHA_SEL_A               0
+#       define EVERGREEN_GRPH_ALPHA_SEL_R               1
+#       define EVERGREEN_GRPH_ALPHA_SEL_G               2
+#       define EVERGREEN_GRPH_ALPHA_SEL_B               3
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS          0x6810
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS        0x6814
+#       define EVERGREEN_GRPH_DFQ_ENABLE                (1 << 0)
+#       define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK      0xffffff00
+#define EVERGREEN_GRPH_PITCH                            0x6818
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH     0x681c
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH   0x6820
+#define EVERGREEN_GRPH_SURFACE_OFFSET_X                 0x6824
+#define EVERGREEN_GRPH_SURFACE_OFFSET_Y                 0x6828
+#define EVERGREEN_GRPH_X_START                          0x682c
+#define EVERGREEN_GRPH_Y_START                          0x6830
+#define EVERGREEN_GRPH_X_END                            0x6834
+#define EVERGREEN_GRPH_Y_END                            0x6838
+
+/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
+#define EVERGREEN_CUR_CONTROL                           0x6998
+#       define EVERGREEN_CURSOR_EN                      (1 << 0)
+#       define EVERGREEN_CURSOR_MODE(x)                 (((x) & 0x3) << 8)
+#       define EVERGREEN_CURSOR_MONO                    0
+#       define EVERGREEN_CURSOR_24_1                    1
+#       define EVERGREEN_CURSOR_24_8_PRE_MULT           2
+#       define EVERGREEN_CURSOR_24_8_UNPRE_MULT         3
+#       define EVERGREEN_CURSOR_2X_MAGNIFY              (1 << 16)
+#       define EVERGREEN_CURSOR_FORCE_MC_ON             (1 << 20)
+#       define EVERGREEN_CURSOR_URGENT_CONTROL(x)       (((x) & 0x7) << 24)
+#       define EVERGREEN_CURSOR_URGENT_ALWAYS           0
+#       define EVERGREEN_CURSOR_URGENT_1_8              1
+#       define EVERGREEN_CURSOR_URGENT_1_4              2
+#       define EVERGREEN_CURSOR_URGENT_3_8              3
+#       define EVERGREEN_CURSOR_URGENT_1_2              4
+#define EVERGREEN_CUR_SURFACE_ADDRESS                   0x699c
+#       define EVERGREEN_CUR_SURFACE_ADDRESS_MASK       0xfffff000
+#define EVERGREEN_CUR_SIZE                              0x69a0
+#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH              0x69a4
+#define EVERGREEN_CUR_POSITION                          0x69a8
+#define EVERGREEN_CUR_HOT_SPOT                          0x69ac
+#define EVERGREEN_CUR_COLOR1                            0x69b0
+#define EVERGREEN_CUR_COLOR2                            0x69b4
+#define EVERGREEN_CUR_UPDATE                            0x69b8
+#       define EVERGREEN_CURSOR_UPDATE_PENDING          (1 << 0)
+#       define EVERGREEN_CURSOR_UPDATE_TAKEN            (1 << 1)
+#       define EVERGREEN_CURSOR_UPDATE_LOCK             (1 << 16)
+#       define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
+
+/* LUT blocks at 0x69e0, 0x75e0, 0x101e0, 0x10de0, 0x119e0, 0x125e0 */
+#define EVERGREEN_DC_LUT_RW_MODE                        0x69e0
+#define EVERGREEN_DC_LUT_RW_INDEX                       0x69e4
+#define EVERGREEN_DC_LUT_SEQ_COLOR                      0x69e8
+#define EVERGREEN_DC_LUT_PWL_DATA                       0x69ec
+#define EVERGREEN_DC_LUT_30_COLOR                       0x69f0
+#define EVERGREEN_DC_LUT_VGA_ACCESS_ENABLE              0x69f4
+#define EVERGREEN_DC_LUT_WRITE_EN_MASK                  0x69f8
+#define EVERGREEN_DC_LUT_AUTOFILL                       0x69fc
+#define EVERGREEN_DC_LUT_CONTROL                        0x6a00
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE              0x6a04
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN             0x6a08
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED               0x6a0c
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE              0x6a10
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN             0x6a14
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED               0x6a18
+
+#define EVERGREEN_DATA_FORMAT                           0x6b00
+#       define EVERGREEN_INTERLEAVE_EN                  (1 << 0)
+#define EVERGREEN_DESKTOP_HEIGHT                        0x6b04
+
+#define EVERGREEN_VIEWPORT_START                        0x6d70
+#define EVERGREEN_VIEWPORT_SIZE                         0x6d74
+
+/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
+#define EVERGREEN_CRTC0_REGISTER_OFFSET                 (0x6df0 - 0x6df0)
+#define EVERGREEN_CRTC1_REGISTER_OFFSET                 (0x79f0 - 0x6df0)
+#define EVERGREEN_CRTC2_REGISTER_OFFSET                 (0x105f0 - 0x6df0)
+#define EVERGREEN_CRTC3_REGISTER_OFFSET                 (0x111f0 - 0x6df0)
+#define EVERGREEN_CRTC4_REGISTER_OFFSET                 (0x11df0 - 0x6df0)
+#define EVERGREEN_CRTC5_REGISTER_OFFSET                 (0x129f0 - 0x6df0)
+
+/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
+#define EVERGREEN_CRTC_CONTROL                          0x6e70
+#       define EVERGREEN_CRTC_MASTER_EN                 (1 << 0)
+#define EVERGREEN_CRTC_UPDATE_LOCK                      0x6ed4
+
+#define EVERGREEN_DC_GPIO_HPD_MASK                      0x64b0
+#define EVERGREEN_DC_GPIO_HPD_A                         0x64b4
+#define EVERGREEN_DC_GPIO_HPD_EN                        0x64b8
+#define EVERGREEN_DC_GPIO_HPD_Y                         0x64bc
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index c0d4650..91eb762 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -197,13 +197,13 @@
 {
 	uint32_t tmp;
 
+	radeon_gart_restore(rdev);
 	/* discard memory request outside of configured range */
 	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
 	WREG32(RADEON_AIC_CNTL, tmp);
 	/* set address range for PCI address translate */
-	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
-	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-	WREG32(RADEON_AIC_HI_ADDR, tmp);
+	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
+	WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
 	/* set PCI GART page-table base address */
 	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
 	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
@@ -312,9 +312,11 @@
 		/* Vertical blank interrupts */
 		if (status & RADEON_CRTC_VBLANK_STAT) {
 			drm_handle_vblank(rdev->ddev, 0);
+			wake_up(&rdev->irq.vblank_queue);
 		}
 		if (status & RADEON_CRTC2_VBLANK_STAT) {
 			drm_handle_vblank(rdev->ddev, 1);
+			wake_up(&rdev->irq.vblank_queue);
 		}
 		if (status & RADEON_FP_DETECT_STAT) {
 			queue_hotplug = true;
@@ -366,8 +368,8 @@
 	radeon_ring_write(rdev, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
 	radeon_ring_write(rdev, RADEON_RB3D_ZC_FLUSH_ALL);
 	/* Wait until IDLE & CLEAN */
-	radeon_ring_write(rdev, PACKET0(0x1720, 0));
-	radeon_ring_write(rdev, (1 << 16) | (1 << 17));
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
 	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 	radeon_ring_write(rdev, rdev->config.r100.hdp_cntl |
 				RADEON_HDP_READ_BUFFER_INVALIDATE);
@@ -1701,7 +1703,7 @@
 	}
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(RADEON_RBBM_STATUS);
-		if (!(tmp & (1 << 31))) {
+		if (!(tmp & RADEON_RBBM_ACTIVE)) {
 			return 0;
 		}
 		DRM_UDELAY(1);
@@ -1716,8 +1718,8 @@
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		/* read MC_STATUS */
-		tmp = RREG32(0x0150);
-		if (tmp & (1 << 2)) {
+		tmp = RREG32(RADEON_MC_STATUS);
+		if (tmp & RADEON_MC_IDLE) {
 			return 0;
 		}
 		DRM_UDELAY(1);
@@ -1790,7 +1792,7 @@
 	}
 	/* Check if GPU is idle */
 	status = RREG32(RADEON_RBBM_STATUS);
-	if (status & (1 << 31)) {
+	if (status & RADEON_RBBM_ACTIVE) {
 		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
 		return -1;
 	}
@@ -1800,6 +1802,9 @@
 
 void r100_set_common_regs(struct radeon_device *rdev)
 {
+	struct drm_device *dev = rdev->ddev;
+	bool force_dac2 = false;
+
 	/* set these so they don't interfere with anything */
 	WREG32(RADEON_OV0_SCALE_CNTL, 0);
 	WREG32(RADEON_SUBPIC_CNTL, 0);
@@ -1808,6 +1813,68 @@
 	WREG32(RADEON_DVI_I2C_CNTL_1, 0);
 	WREG32(RADEON_CAP0_TRIG_CNTL, 0);
 	WREG32(RADEON_CAP1_TRIG_CNTL, 0);
+
+	/* always set up dac2 on rn50 and some rv100 as lots
+	 * of servers seem to wire it up to a VGA port but
+	 * don't report it in the bios connector
+	 * table.
+	 */
+	switch (dev->pdev->device) {
+		/* RN50 */
+	case 0x515e:
+	case 0x5969:
+		force_dac2 = true;
+		break;
+		/* RV100*/
+	case 0x5159:
+	case 0x515a:
+		/* DELL triple head servers */
+		if ((dev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
+		    ((dev->pdev->subsystem_device == 0x016c) ||
+		     (dev->pdev->subsystem_device == 0x016d) ||
+		     (dev->pdev->subsystem_device == 0x016e) ||
+		     (dev->pdev->subsystem_device == 0x016f) ||
+		     (dev->pdev->subsystem_device == 0x0170) ||
+		     (dev->pdev->subsystem_device == 0x017d) ||
+		     (dev->pdev->subsystem_device == 0x017e) ||
+		     (dev->pdev->subsystem_device == 0x0183) ||
+		     (dev->pdev->subsystem_device == 0x018a) ||
+		     (dev->pdev->subsystem_device == 0x019a)))
+			force_dac2 = true;
+		break;
+	}
+
+	if (force_dac2) {
+		u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
+		u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
+		u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
+
+		/* For CRT on DAC2, don't turn it on if BIOS didn't
+		   enable it, even it's detected.
+		*/
+
+		/* force it to crtc0 */
+		dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
+		dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
+		disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
+
+		/* set up the TV DAC */
+		tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
+				 RADEON_TV_DAC_STD_MASK |
+				 RADEON_TV_DAC_RDACPD |
+				 RADEON_TV_DAC_GDACPD |
+				 RADEON_TV_DAC_BDACPD |
+				 RADEON_TV_DAC_BGADJ_MASK |
+				 RADEON_TV_DAC_DACADJ_MASK);
+		tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
+				RADEON_TV_DAC_NHOLD |
+				RADEON_TV_DAC_STD_PS2 |
+				(0x58 << 16));
+
+		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
+		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
+		WREG32(RADEON_DAC_CNTL2, dac2_cntl);
+	}
 }
 
 /*
@@ -1889,17 +1956,20 @@
 void r100_vram_init_sizes(struct radeon_device *rdev)
 {
 	u64 config_aper_size;
-	u32 accessible;
 
+	/* work out accessible VRAM */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
+	/* FIXME we don't use the second aperture yet when we could use it */
+	if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
+		rdev->mc.visible_vram_size = rdev->mc.aper_size;
 	config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
-
 	if (rdev->flags & RADEON_IS_IGP) {
 		uint32_t tom;
 		/* read NB_TOM to get the amount of ram stolen for the GPU */
 		tom = RREG32(RADEON_NB_TOM);
 		rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
-		/* for IGPs we need to keep VRAM where it was put by the BIOS */
-		rdev->mc.vram_location = (tom & 0xffff) << 16;
 		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
 		rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 	} else {
@@ -1911,30 +1981,19 @@
 			rdev->mc.real_vram_size = 8192 * 1024;
 			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
 		}
-		/* let driver place VRAM */
-		rdev->mc.vram_location = 0xFFFFFFFFUL;
-		 /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
-		  * Novell bug 204882 + along with lots of ubuntu ones */
+		/* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM - 
+		 * Novell bug 204882 + along with lots of ubuntu ones
+		 */
 		if (config_aper_size > rdev->mc.real_vram_size)
 			rdev->mc.mc_vram_size = config_aper_size;
 		else
 			rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
 	}
-
-	/* work out accessible VRAM */
-	accessible = r100_get_accessible_vram(rdev);
-
-	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
-	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
-
-	if (accessible > rdev->mc.aper_size)
-		accessible = rdev->mc.aper_size;
-
-	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
+	/* FIXME remove this once we support unmappable VRAM */
+	if (rdev->mc.mc_vram_size > rdev->mc.aper_size) {
 		rdev->mc.mc_vram_size = rdev->mc.aper_size;
-
-	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
 		rdev->mc.real_vram_size = rdev->mc.aper_size;
+	}
 }
 
 void r100_vga_set_state(struct radeon_device *rdev, bool state)
@@ -1951,11 +2010,18 @@
 	WREG32(RADEON_CONFIG_CNTL, temp);
 }
 
-void r100_vram_info(struct radeon_device *rdev)
+void r100_mc_init(struct radeon_device *rdev)
 {
-	r100_vram_get_type(rdev);
+	u64 base;
 
+	r100_vram_get_type(rdev);
 	r100_vram_init_sizes(rdev);
+	base = rdev->mc.aper_base;
+	if (rdev->flags & RADEON_IS_IGP)
+		base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
+	radeon_vram_location(rdev, &rdev->mc, base);
+	if (!(rdev->flags & RADEON_IS_AGP))
+		radeon_gtt_location(rdev, &rdev->mc);
 }
 
 
@@ -3226,10 +3292,9 @@
 void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
 {
 	/* Update base address for crtc */
-	WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_location);
+	WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
 	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
-		WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR,
-				rdev->mc.vram_location);
+		WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
 	}
 	/* Restore CRTC registers */
 	WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
@@ -3390,32 +3455,6 @@
 	rdev->bios = NULL;
 }
 
-int r100_mc_init(struct radeon_device *rdev)
-{
-	int r;
-	u32 tmp;
-
-	/* Setup GPU memory space */
-	rdev->mc.vram_location = 0xFFFFFFFFUL;
-	rdev->mc.gtt_location = 0xFFFFFFFFUL;
-	if (rdev->flags & RADEON_IS_IGP) {
-		tmp = G_00015C_MC_FB_START(RREG32(R_00015C_NB_TOM));
-		rdev->mc.vram_location = tmp << 16;
-	}
-	if (rdev->flags & RADEON_IS_AGP) {
-		r = radeon_agp_init(rdev);
-		if (r) {
-			radeon_agp_disable(rdev);
-		} else {
-			rdev->mc.gtt_location = rdev->mc.agp_base;
-		}
-	}
-	r = radeon_mc_setup(rdev);
-	if (r)
-		return r;
-	return 0;
-}
-
 int r100_init(struct radeon_device *rdev)
 {
 	int r;
@@ -3458,12 +3497,15 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	r100_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = r100_mc_init(rdev);
-	if (r)
-		return r;
+	/* initialize AGP */
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			radeon_agp_disable(rdev);
+		}
+	}
+	/* initialize VRAM */
+	r100_mc_init(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
 	if (r)
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index ff1e0cd..1146c99 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -31,6 +31,7 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 
+#include "r100d.h"
 #include "r200_reg_safe.h"
 
 #include "r100_track.h"
@@ -79,6 +80,51 @@
 	return vtx_size;
 }
 
+int r200_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset,
+		  uint64_t dst_offset,
+		  unsigned num_pages,
+		  struct radeon_fence *fence)
+{
+	uint32_t size;
+	uint32_t cur_size;
+	int i, num_loops;
+	int r = 0;
+
+	/* radeon pitch is /64 */
+	size = num_pages << PAGE_SHIFT;
+	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
+	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+	/* Must wait for 2D idle & clean before DMA or hangs might happen */
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, (1 << 16));
+	for (i = 0; i < num_loops; i++) {
+		cur_size = size;
+		if (cur_size > 0x1FFFFF) {
+			cur_size = 0x1FFFFF;
+		}
+		size -= cur_size;
+		radeon_ring_write(rdev, PACKET0(0x720, 2));
+		radeon_ring_write(rdev, src_offset);
+		radeon_ring_write(rdev, dst_offset);
+		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
+		src_offset += cur_size;
+		dst_offset += cur_size;
+	}
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
+	if (fence) {
+		r = radeon_fence_emit(rdev, fence);
+	}
+	radeon_ring_unlock_commit(rdev);
+	return r;
+}
+
+
 static int r200_get_vtx_size_1(uint32_t vtx_fmt_1)
 {
 	int vtx_size, i, tex_size;
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 43b55a0..4cef90c 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -117,18 +117,19 @@
 	r = radeon_gart_table_vram_pin(rdev);
 	if (r)
 		return r;
+	radeon_gart_restore(rdev);
 	/* discard memory request outside of configured range */
 	tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
-	WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_location);
-	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - RADEON_GPU_PAGE_SIZE;
+	WREG32_PCIE(RADEON_PCIE_TX_GART_START_LO, rdev->mc.gtt_start);
+	tmp = rdev->mc.gtt_end & ~RADEON_GPU_PAGE_MASK;
 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_LO, tmp);
 	WREG32_PCIE(RADEON_PCIE_TX_GART_START_HI, 0);
 	WREG32_PCIE(RADEON_PCIE_TX_GART_END_HI, 0);
 	table_addr = rdev->gart.table_addr;
 	WREG32_PCIE(RADEON_PCIE_TX_GART_BASE, table_addr);
 	/* FIXME: setup default page */
-	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_location);
+	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_LO, rdev->mc.vram_start);
 	WREG32_PCIE(RADEON_PCIE_TX_DISCARD_RD_ADDR_HI, 0);
 	/* Clear error */
 	WREG32_PCIE(0x18, 0);
@@ -174,18 +175,20 @@
 	/* Who ever call radeon_fence_emit should call ring_lock and ask
 	 * for enough space (today caller are ib schedule and buffer move) */
 	/* Write SC register so SC & US assert idle */
-	radeon_ring_write(rdev, PACKET0(0x43E0, 0));
+	radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_TL, 0));
 	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, PACKET0(0x43E4, 0));
+	radeon_ring_write(rdev, PACKET0(R300_RE_SCISSORS_BR, 0));
 	radeon_ring_write(rdev, 0);
 	/* Flush 3D cache */
-	radeon_ring_write(rdev, PACKET0(0x4E4C, 0));
-	radeon_ring_write(rdev, (2 << 0));
-	radeon_ring_write(rdev, PACKET0(0x4F18, 0));
-	radeon_ring_write(rdev, (1 << 0));
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_RB3D_DC_FLUSH);
+	radeon_ring_write(rdev, PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
+	radeon_ring_write(rdev, R300_ZC_FLUSH);
 	/* Wait until IDLE & CLEAN */
-	radeon_ring_write(rdev, PACKET0(0x1720, 0));
-	radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
+	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
+	radeon_ring_write(rdev, (RADEON_WAIT_3D_IDLECLEAN |
+				 RADEON_WAIT_2D_IDLECLEAN |
+				 RADEON_WAIT_DMA_GUI_IDLE));
 	radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 	radeon_ring_write(rdev, rdev->config.r300.hdp_cntl |
 				RADEON_HDP_READ_BUFFER_INVALIDATE);
@@ -198,50 +201,6 @@
 	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
 }
 
-int r300_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset,
-		  uint64_t dst_offset,
-		  unsigned num_pages,
-		  struct radeon_fence *fence)
-{
-	uint32_t size;
-	uint32_t cur_size;
-	int i, num_loops;
-	int r = 0;
-
-	/* radeon pitch is /64 */
-	size = num_pages << PAGE_SHIFT;
-	num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
-	r = radeon_ring_lock(rdev, num_loops * 4 + 64);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
-	/* Must wait for 2D idle & clean before DMA or hangs might happen */
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0 ));
-	radeon_ring_write(rdev, (1 << 16));
-	for (i = 0; i < num_loops; i++) {
-		cur_size = size;
-		if (cur_size > 0x1FFFFF) {
-			cur_size = 0x1FFFFF;
-		}
-		size -= cur_size;
-		radeon_ring_write(rdev, PACKET0(0x720, 2));
-		radeon_ring_write(rdev, src_offset);
-		radeon_ring_write(rdev, dst_offset);
-		radeon_ring_write(rdev, cur_size | (1 << 31) | (1 << 30));
-		src_offset += cur_size;
-		dst_offset += cur_size;
-	}
-	radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
-	radeon_ring_write(rdev, RADEON_WAIT_DMA_GUI_IDLE);
-	if (fence) {
-		r = radeon_fence_emit(rdev, fence);
-	}
-	radeon_ring_unlock_commit(rdev);
-	return r;
-}
-
 void r300_ring_start(struct radeon_device *rdev)
 {
 	unsigned gb_tile_config;
@@ -281,8 +240,8 @@
 	radeon_ring_write(rdev,
 			  RADEON_WAIT_2D_IDLECLEAN |
 			  RADEON_WAIT_3D_IDLECLEAN);
-	radeon_ring_write(rdev, PACKET0(0x170C, 0));
-	radeon_ring_write(rdev, 1 << 31);
+	radeon_ring_write(rdev, PACKET0(R300_DST_PIPE_CONFIG, 0));
+	radeon_ring_write(rdev, R300_PIPE_AUTO_CONFIG);
 	radeon_ring_write(rdev, PACKET0(R300_GB_SELECT, 0));
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, PACKET0(R300_GB_ENABLE, 0));
@@ -349,8 +308,8 @@
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		/* read MC_STATUS */
-		tmp = RREG32(0x0150);
-		if (tmp & (1 << 4)) {
+		tmp = RREG32(RADEON_MC_STATUS);
+		if (tmp & R300_MC_IDLE) {
 			return 0;
 		}
 		DRM_UDELAY(1);
@@ -395,8 +354,8 @@
 		       "programming pipes. Bad things might happen.\n");
 	}
 
-	tmp = RREG32(0x170C);
-	WREG32(0x170C, tmp | (1 << 31));
+	tmp = RREG32(R300_DST_PIPE_CONFIG);
+	WREG32(R300_DST_PIPE_CONFIG, tmp | R300_PIPE_AUTO_CONFIG);
 
 	WREG32(R300_RB2D_DSTCACHE_MODE,
 	       R300_DC_AUTOFLUSH_ENABLE |
@@ -437,8 +396,8 @@
 			/* GA still busy soft reset it */
 			WREG32(0x429C, 0x200);
 			WREG32(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-			WREG32(0x43E0, 0);
-			WREG32(0x43E4, 0);
+			WREG32(R300_RE_SCISSORS_TL, 0);
+			WREG32(R300_RE_SCISSORS_BR, 0);
 			WREG32(0x24AC, 0);
 		}
 		/* Wait to prevent race in RBBM_STATUS */
@@ -488,7 +447,7 @@
 	}
 	/* Check if GPU is idle */
 	status = RREG32(RADEON_RBBM_STATUS);
-	if (status & (1 << 31)) {
+	if (status & RADEON_RBBM_ACTIVE) {
 		DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
 		return -1;
 	}
@@ -500,13 +459,13 @@
 /*
  * r300,r350,rv350,rv380 VRAM info
  */
-void r300_vram_info(struct radeon_device *rdev)
+void r300_mc_init(struct radeon_device *rdev)
 {
-	uint32_t tmp;
+	u64 base;
+	u32 tmp;
 
 	/* DDR for all card after R300 & IGP */
 	rdev->mc.vram_is_ddr = true;
-
 	tmp = RREG32(RADEON_MEM_CNTL);
 	tmp &= R300_MEM_NUM_CHANNELS_MASK;
 	switch (tmp) {
@@ -515,8 +474,13 @@
 	case 2: rdev->mc.vram_width = 256; break;
 	default:  rdev->mc.vram_width = 128; break;
 	}
-
 	r100_vram_init_sizes(rdev);
+	base = rdev->mc.aper_base;
+	if (rdev->flags & RADEON_IS_IGP)
+		base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
+	radeon_vram_location(rdev, &rdev->mc, base);
+	if (!(rdev->flags & RADEON_IS_AGP))
+		radeon_gtt_location(rdev, &rdev->mc);
 }
 
 void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes)
@@ -578,6 +542,40 @@
 
 }
 
+int rv370_get_pcie_lanes(struct radeon_device *rdev)
+{
+	u32 link_width_cntl;
+
+	if (rdev->flags & RADEON_IS_IGP)
+		return 0;
+
+	if (!(rdev->flags & RADEON_IS_PCIE))
+		return 0;
+
+	/* FIXME wait for idle */
+
+	if (rdev->family < CHIP_R600)
+		link_width_cntl = RREG32_PCIE(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+	else
+		link_width_cntl = RREG32_PCIE_P(RADEON_PCIE_LC_LINK_WIDTH_CNTL);
+
+	switch ((link_width_cntl & RADEON_PCIE_LC_LINK_WIDTH_RD_MASK) >> RADEON_PCIE_LC_LINK_WIDTH_RD_SHIFT) {
+	case RADEON_PCIE_LC_LINK_WIDTH_X0:
+		return 0;
+	case RADEON_PCIE_LC_LINK_WIDTH_X1:
+		return 1;
+	case RADEON_PCIE_LC_LINK_WIDTH_X2:
+		return 2;
+	case RADEON_PCIE_LC_LINK_WIDTH_X4:
+		return 4;
+	case RADEON_PCIE_LC_LINK_WIDTH_X8:
+		return 8;
+	case RADEON_PCIE_LC_LINK_WIDTH_X16:
+	default:
+		return 16;
+	}
+}
+
 #if defined(CONFIG_DEBUG_FS)
 static int rv370_debugfs_pcie_gart_info(struct seq_file *m, void *data)
 {
@@ -707,6 +705,8 @@
 			tile_flags |= R300_TXO_MACRO_TILE;
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
 			tile_flags |= R300_TXO_MICRO_TILE;
+		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			tile_flags |= R300_TXO_MICRO_TILE_SQUARE;
 
 		tmp = idx_value + ((u32)reloc->lobj.gpu_offset);
 		tmp |= tile_flags;
@@ -757,6 +757,8 @@
 			tile_flags |= R300_COLOR_TILE_ENABLE;
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
 			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
+		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE;
 
 		tmp = idx_value & ~(0x7 << 16);
 		tmp |= tile_flags;
@@ -828,7 +830,9 @@
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
 			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
-			tile_flags |= R300_DEPTHMICROTILE_TILED;;
+			tile_flags |= R300_DEPTHMICROTILE_TILED;
+		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE;
 
 		tmp = idx_value & ~(0x7 << 16);
 		tmp |= tile_flags;
@@ -1387,12 +1391,15 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	r300_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = r420_mc_init(rdev);
-	if (r)
-		return r;
+	/* initialize AGP */
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			radeon_agp_disable(rdev);
+		}
+	}
+	/* initialize memory controller */
+	r300_mc_init(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
 	if (r)
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index 34bffa0..ea46d55 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -33,6 +33,7 @@
 
 #include "drmP.h"
 #include "drm.h"
+#include "drm_buffer.h"
 #include "radeon_drm.h"
 #include "radeon_drv.h"
 #include "r300_reg.h"
@@ -299,46 +300,42 @@
 	int reg;
 	int sz;
 	int i;
-	int values[64];
+	u32 *value;
 	RING_LOCALS;
 
 	sz = header.packet0.count;
 	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 
 	if ((sz > 64) || (sz < 0)) {
-		DRM_ERROR
-		    ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
-		     reg, sz);
+		DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
+			 reg, sz);
 		return -EINVAL;
 	}
+
 	for (i = 0; i < sz; i++) {
-		values[i] = ((int *)cmdbuf->buf)[i];
 		switch (r300_reg_flags[(reg >> 2) + i]) {
 		case MARK_SAFE:
 			break;
 		case MARK_CHECK_OFFSET:
-			if (!radeon_check_offset(dev_priv, (u32) values[i])) {
-				DRM_ERROR
-				    ("Offset failed range check (reg=%04x sz=%d)\n",
-				     reg, sz);
+			value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
+			if (!radeon_check_offset(dev_priv, *value)) {
+				DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
+					 reg, sz);
 				return -EINVAL;
 			}
 			break;
 		default:
 			DRM_ERROR("Register %04x failed check as flag=%02x\n",
-				  reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
+				reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
 			return -EINVAL;
 		}
 	}
 
 	BEGIN_RING(1 + sz);
 	OUT_RING(CP_PACKET0(reg, sz - 1));
-	OUT_RING_TABLE(values, sz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 	ADVANCE_RING();
 
-	cmdbuf->buf += sz * 4;
-	cmdbuf->bufsz -= sz * 4;
-
 	return 0;
 }
 
@@ -362,7 +359,7 @@
 	if (!sz)
 		return 0;
 
-	if (sz * 4 > cmdbuf->bufsz)
+	if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 		return -EINVAL;
 
 	if (reg + sz * 4 >= 0x10000) {
@@ -380,12 +377,9 @@
 
 	BEGIN_RING(1 + sz);
 	OUT_RING(CP_PACKET0(reg, sz - 1));
-	OUT_RING_TABLE((int *)cmdbuf->buf, sz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 	ADVANCE_RING();
 
-	cmdbuf->buf += sz * 4;
-	cmdbuf->bufsz -= sz * 4;
-
 	return 0;
 }
 
@@ -407,7 +401,7 @@
 
 	if (!sz)
 		return 0;
-	if (sz * 16 > cmdbuf->bufsz)
+	if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
 		return -EINVAL;
 
 	/* VAP is very sensitive so we purge cache before we program it
@@ -426,7 +420,7 @@
 	BEGIN_RING(3 + sz * 4);
 	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
 	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
-	OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
 	ADVANCE_RING();
 
 	BEGIN_RING(2);
@@ -434,9 +428,6 @@
 	OUT_RING(0);
 	ADVANCE_RING();
 
-	cmdbuf->buf += sz * 16;
-	cmdbuf->bufsz -= sz * 16;
-
 	return 0;
 }
 
@@ -449,14 +440,14 @@
 {
 	RING_LOCALS;
 
-	if (8 * 4 > cmdbuf->bufsz)
+	if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 		return -EINVAL;
 
 	BEGIN_RING(10);
 	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
 	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
 		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
-	OUT_RING_TABLE((int *)cmdbuf->buf, 8);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
 	ADVANCE_RING();
 
 	BEGIN_RING(4);
@@ -468,9 +459,6 @@
 	/* set flush flag */
 	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 
-	cmdbuf->buf += 8 * 4;
-	cmdbuf->bufsz -= 8 * 4;
-
 	return 0;
 }
 
@@ -480,28 +468,29 @@
 {
 	int count, i, k;
 #define MAX_ARRAY_PACKET  64
-	u32 payload[MAX_ARRAY_PACKET];
+	u32 *data;
 	u32 narrays;
 	RING_LOCALS;
 
-	count = (header >> 16) & 0x3fff;
+	count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 
 	if ((count + 1) > MAX_ARRAY_PACKET) {
 		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
 			  count);
 		return -EINVAL;
 	}
-	memset(payload, 0, MAX_ARRAY_PACKET * 4);
-	memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
-
 	/* carefully check packet contents */
 
-	narrays = payload[0];
+	/* We have already read the header so advance the buffer. */
+	drm_buffer_advance(cmdbuf->buffer, 4);
+
+	narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 	k = 0;
 	i = 1;
 	while ((k < narrays) && (i < (count + 1))) {
 		i++;		/* skip attribute field */
-		if (!radeon_check_offset(dev_priv, payload[i])) {
+		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
+		if (!radeon_check_offset(dev_priv, *data)) {
 			DRM_ERROR
 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
 			     k, i);
@@ -512,7 +501,8 @@
 		if (k == narrays)
 			break;
 		/* have one more to process, they come in pairs */
-		if (!radeon_check_offset(dev_priv, payload[i])) {
+		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
+		if (!radeon_check_offset(dev_priv, *data)) {
 			DRM_ERROR
 			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
 			     k, i);
@@ -533,30 +523,30 @@
 
 	BEGIN_RING(count + 2);
 	OUT_RING(header);
-	OUT_RING_TABLE(payload, count + 1);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
 	ADVANCE_RING();
 
-	cmdbuf->buf += (count + 2) * 4;
-	cmdbuf->bufsz -= (count + 2) * 4;
-
 	return 0;
 }
 
 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
 					     drm_radeon_kcmd_buffer_t *cmdbuf)
 {
-	u32 *cmd = (u32 *) cmdbuf->buf;
+	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 	int count, ret;
 	RING_LOCALS;
 
-	count=(cmd[0]>>16) & 0x3fff;
 
-	if (cmd[0] & 0x8000) {
+	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
+
+	if (*cmd & 0x8000) {
 		u32 offset;
-
-		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
+		u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+		if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
-			offset = cmd[2] << 10;
+
+			u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
+			offset = *cmd2 << 10;
 			ret = !radeon_check_offset(dev_priv, offset);
 			if (ret) {
 				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
@@ -564,9 +554,10 @@
 			}
 		}
 
-		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
-		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
-			offset = cmd[3] << 10;
+		if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
+		    (*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
+			u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
+			offset = *cmd3 << 10;
 			ret = !radeon_check_offset(dev_priv, offset);
 			if (ret) {
 				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
@@ -577,28 +568,25 @@
 	}
 
 	BEGIN_RING(count+2);
-	OUT_RING(cmd[0]);
-	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 	ADVANCE_RING();
 
-	cmdbuf->buf += (count+2)*4;
-	cmdbuf->bufsz -= (count+2)*4;
-
 	return 0;
 }
 
 static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
 					    drm_radeon_kcmd_buffer_t *cmdbuf)
 {
-	u32 *cmd;
+	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
+	u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
 	int count;
 	int expected_count;
 	RING_LOCALS;
 
-	cmd = (u32 *) cmdbuf->buf;
-	count = (cmd[0]>>16) & 0x3fff;
-	expected_count = cmd[1] >> 16;
-	if (!(cmd[1] & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
+	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
+
+	expected_count = *cmd1 >> 16;
+	if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
 		expected_count = (expected_count+1)/2;
 
 	if (count && count != expected_count) {
@@ -608,55 +596,53 @@
 	}
 
 	BEGIN_RING(count+2);
-	OUT_RING(cmd[0]);
-	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 	ADVANCE_RING();
 
-	cmdbuf->buf += (count+2)*4;
-	cmdbuf->bufsz -= (count+2)*4;
-
 	if (!count) {
-		drm_r300_cmd_header_t header;
+		drm_r300_cmd_header_t stack_header, *header;
+		u32 *cmd1, *cmd2, *cmd3;
 
-		if (cmdbuf->bufsz < 4*4 + sizeof(header)) {
+		if (drm_buffer_unprocessed(cmdbuf->buffer)
+				< 4*4 + sizeof(stack_header)) {
 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
 			return -EINVAL;
 		}
 
-		header.u = *(unsigned int *)cmdbuf->buf;
+		header = drm_buffer_read_object(cmdbuf->buffer,
+				sizeof(stack_header), &stack_header);
 
-		cmdbuf->buf += sizeof(header);
-		cmdbuf->bufsz -= sizeof(header);
-		cmd = (u32 *) cmdbuf->buf;
+		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
+		cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+		cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
+		cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
 
-		if (header.header.cmd_type != R300_CMD_PACKET3 ||
-		    header.packet3.packet != R300_CMD_PACKET3_RAW ||
-		    cmd[0] != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
+		if (header->header.cmd_type != R300_CMD_PACKET3 ||
+		    header->packet3.packet != R300_CMD_PACKET3_RAW ||
+		    *cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
 			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
 			return -EINVAL;
 		}
 
-		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
-			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
+		if ((*cmd1 & 0x8000ffff) != 0x80000810) {
+			DRM_ERROR("Invalid indx_buffer reg address %08X\n",
+					*cmd1);
 			return -EINVAL;
 		}
-		if (!radeon_check_offset(dev_priv, cmd[2])) {
-			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
+		if (!radeon_check_offset(dev_priv, *cmd2)) {
+			DRM_ERROR("Invalid indx_buffer offset is %08X\n",
+					*cmd2);
 			return -EINVAL;
 		}
-		if (cmd[3] != expected_count) {
+		if (*cmd3 != expected_count) {
 			DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
-				cmd[3], expected_count);
+				*cmd3, expected_count);
 			return -EINVAL;
 		}
 
 		BEGIN_RING(4);
-		OUT_RING(cmd[0]);
-		OUT_RING_TABLE((int *)(cmdbuf->buf + 4), 3);
+		OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
 		ADVANCE_RING();
-
-		cmdbuf->buf += 4*4;
-		cmdbuf->bufsz -= 4*4;
 	}
 
 	return 0;
@@ -665,39 +651,39 @@
 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
 					    drm_radeon_kcmd_buffer_t *cmdbuf)
 {
-	u32 header;
+	u32 *header;
 	int count;
 	RING_LOCALS;
 
-	if (4 > cmdbuf->bufsz)
+	if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
 		return -EINVAL;
 
 	/* Fixme !! This simply emits a packet without much checking.
 	   We need to be smarter. */
 
 	/* obtain first word - actual packet3 header */
-	header = *(u32 *) cmdbuf->buf;
+	header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 
 	/* Is it packet 3 ? */
-	if ((header >> 30) != 0x3) {
-		DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
+	if ((*header >> 30) != 0x3) {
+		DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
 		return -EINVAL;
 	}
 
-	count = (header >> 16) & 0x3fff;
+	count = (*header >> 16) & 0x3fff;
 
 	/* Check again now that we know how much data to expect */
-	if ((count + 2) * 4 > cmdbuf->bufsz) {
+	if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
 		DRM_ERROR
 		    ("Expected packet3 of length %d but have only %d bytes left\n",
-		     (count + 2) * 4, cmdbuf->bufsz);
+		     (count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
 		return -EINVAL;
 	}
 
 	/* Is it a packet type we know about ? */
-	switch (header & 0xff00) {
+	switch (*header & 0xff00) {
 	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
-		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
+		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
 
 	case RADEON_CNTL_BITBLT_MULTI:
 		return r300_emit_bitblt_multi(dev_priv, cmdbuf);
@@ -723,18 +709,14 @@
 		/* these packets are safe */
 		break;
 	default:
-		DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
+		DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
 		return -EINVAL;
 	}
 
 	BEGIN_RING(count + 2);
-	OUT_RING(header);
-	OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 	ADVANCE_RING();
 
-	cmdbuf->buf += (count + 2) * 4;
-	cmdbuf->bufsz -= (count + 2) * 4;
-
 	return 0;
 }
 
@@ -748,8 +730,7 @@
 {
 	int n;
 	int ret;
-	char *orig_buf = cmdbuf->buf;
-	int orig_bufsz = cmdbuf->bufsz;
+	int orig_iter = cmdbuf->buffer->iterator;
 
 	/* This is a do-while-loop so that we run the interior at least once,
 	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
@@ -761,8 +742,7 @@
 			if (ret)
 				return ret;
 
-			cmdbuf->buf = orig_buf;
-			cmdbuf->bufsz = orig_bufsz;
+			cmdbuf->buffer->iterator = orig_iter;
 		}
 
 		switch (header.packet3.packet) {
@@ -785,9 +765,9 @@
 			break;
 
 		default:
-			DRM_ERROR("bad packet3 type %i at %p\n",
+			DRM_ERROR("bad packet3 type %i at byte %d\n",
 				  header.packet3.packet,
-				  cmdbuf->buf - sizeof(header));
+				  cmdbuf->buffer->iterator - (int)sizeof(header));
 			return -EINVAL;
 		}
 
@@ -923,12 +903,13 @@
 			drm_r300_cmd_header_t header)
 {
 	u32 *ref_age_base;
-	u32 i, buf_idx, h_pending;
-	u64 ptr_addr;
+	u32 i, *buf_idx, h_pending;
+	u64 *ptr_addr;
+	u64 stack_ptr_addr;
 	RING_LOCALS;
 
-	if (cmdbuf->bufsz <
-	    (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) {
+	if (drm_buffer_unprocessed(cmdbuf->buffer) <
+	    (sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
 		return -EINVAL;
 	}
 
@@ -938,36 +919,35 @@
 
 	dev_priv->scratch_ages[header.scratch.reg]++;
 
-	ptr_addr = get_unaligned((u64 *)cmdbuf->buf);
-	ref_age_base = (u32 *)(unsigned long)ptr_addr;
-
-	cmdbuf->buf += sizeof(u64);
-	cmdbuf->bufsz -= sizeof(u64);
+	ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
+			sizeof(stack_ptr_addr), &stack_ptr_addr);
+	ref_age_base = (u32 *)(unsigned long)*ptr_addr;
 
 	for (i=0; i < header.scratch.n_bufs; i++) {
-		buf_idx = *(u32 *)cmdbuf->buf;
-		buf_idx *= 2; /* 8 bytes per buf */
+		buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
+		*buf_idx *= 2; /* 8 bytes per buf */
 
-		if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
+		if (DRM_COPY_TO_USER(ref_age_base + *buf_idx,
+				&dev_priv->scratch_ages[header.scratch.reg],
+				sizeof(u32)))
 			return -EINVAL;
-		}
 
-		if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
+		if (DRM_COPY_FROM_USER(&h_pending,
+				ref_age_base + *buf_idx + 1,
+				sizeof(u32)))
 			return -EINVAL;
-		}
 
-		if (h_pending == 0) {
+		if (h_pending == 0)
 			return -EINVAL;
-		}
 
 		h_pending--;
 
-		if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
+		if (DRM_COPY_TO_USER(ref_age_base + *buf_idx + 1,
+					&h_pending,
+					sizeof(u32)))
 			return -EINVAL;
-		}
 
-		cmdbuf->buf += sizeof(buf_idx);
-		cmdbuf->bufsz -= sizeof(buf_idx);
+		drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
 	}
 
 	BEGIN_RING(2);
@@ -1009,19 +989,16 @@
 	DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
 	if (!sz)
 		return 0;
-	if (sz * stride * 4 > cmdbuf->bufsz)
+	if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 		return -EINVAL;
 
 	BEGIN_RING(3 + sz * stride);
 	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
 	OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
-	OUT_RING_TABLE((int *)cmdbuf->buf, sz * stride);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
 
 	ADVANCE_RING();
 
-	cmdbuf->buf += sz * stride * 4;
-	cmdbuf->bufsz -= sz * stride * 4;
-
 	return 0;
 }
 
@@ -1053,19 +1030,18 @@
 			goto cleanup;
 	}
 
-	while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
+	while (drm_buffer_unprocessed(cmdbuf->buffer)
+			>= sizeof(drm_r300_cmd_header_t)) {
 		int idx;
-		drm_r300_cmd_header_t header;
+		drm_r300_cmd_header_t *header, stack_header;
 
-		header.u = *(unsigned int *)cmdbuf->buf;
+		header = drm_buffer_read_object(cmdbuf->buffer,
+				sizeof(stack_header), &stack_header);
 
-		cmdbuf->buf += sizeof(header);
-		cmdbuf->bufsz -= sizeof(header);
-
-		switch (header.header.cmd_type) {
+		switch (header->header.cmd_type) {
 		case R300_CMD_PACKET0:
 			DRM_DEBUG("R300_CMD_PACKET0\n");
-			ret = r300_emit_packet0(dev_priv, cmdbuf, header);
+			ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
 			if (ret) {
 				DRM_ERROR("r300_emit_packet0 failed\n");
 				goto cleanup;
@@ -1074,7 +1050,7 @@
 
 		case R300_CMD_VPU:
 			DRM_DEBUG("R300_CMD_VPU\n");
-			ret = r300_emit_vpu(dev_priv, cmdbuf, header);
+			ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
 			if (ret) {
 				DRM_ERROR("r300_emit_vpu failed\n");
 				goto cleanup;
@@ -1083,7 +1059,7 @@
 
 		case R300_CMD_PACKET3:
 			DRM_DEBUG("R300_CMD_PACKET3\n");
-			ret = r300_emit_packet3(dev_priv, cmdbuf, header);
+			ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
 			if (ret) {
 				DRM_ERROR("r300_emit_packet3 failed\n");
 				goto cleanup;
@@ -1117,8 +1093,8 @@
 				int i;
 				RING_LOCALS;
 
-				BEGIN_RING(header.delay.count);
-				for (i = 0; i < header.delay.count; i++)
+				BEGIN_RING(header->delay.count);
+				for (i = 0; i < header->delay.count; i++)
 					OUT_RING(RADEON_CP_PACKET2);
 				ADVANCE_RING();
 			}
@@ -1126,7 +1102,7 @@
 
 		case R300_CMD_DMA_DISCARD:
 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
-			idx = header.dma.buf_idx;
+			idx = header->dma.buf_idx;
 			if (idx < 0 || idx >= dma->buf_count) {
 				DRM_ERROR("buffer index %d (of %d max)\n",
 					  idx, dma->buf_count - 1);
@@ -1149,12 +1125,12 @@
 
 		case R300_CMD_WAIT:
 			DRM_DEBUG("R300_CMD_WAIT\n");
-			r300_cmd_wait(dev_priv, header);
+			r300_cmd_wait(dev_priv, *header);
 			break;
 
 		case R300_CMD_SCRATCH:
 			DRM_DEBUG("R300_CMD_SCRATCH\n");
-			ret = r300_scratch(dev_priv, cmdbuf, header);
+			ret = r300_scratch(dev_priv, cmdbuf, *header);
 			if (ret) {
 				DRM_ERROR("r300_scratch failed\n");
 				goto cleanup;
@@ -1168,16 +1144,16 @@
 				goto cleanup;
 			}
 			DRM_DEBUG("R300_CMD_R500FP\n");
-			ret = r300_emit_r500fp(dev_priv, cmdbuf, header);
+			ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
 			if (ret) {
 				DRM_ERROR("r300_emit_r500fp failed\n");
 				goto cleanup;
 			}
 			break;
 		default:
-			DRM_ERROR("bad cmd_type %i at %p\n",
-				  header.header.cmd_type,
-				  cmdbuf->buf - sizeof(header));
+			DRM_ERROR("bad cmd_type %i at byte %d\n",
+				  header->header.cmd_type,
+				  cmdbuf->buffer->iterator - (int)sizeof(*header));
 			ret = -EINVAL;
 			goto cleanup;
 		}
diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h
index 1735a2b..1a0d536 100644
--- a/drivers/gpu/drm/radeon/r300_reg.h
+++ b/drivers/gpu/drm/radeon/r300_reg.h
@@ -952,6 +952,7 @@
 #       define R300_TXO_ENDIAN_HALFDW_SWAP       (3 << 0)
 #       define R300_TXO_MACRO_TILE               (1 << 2)
 #       define R300_TXO_MICRO_TILE               (1 << 3)
+#       define R300_TXO_MICRO_TILE_SQUARE        (2 << 3)
 #       define R300_TXO_OFFSET_MASK              0xffffffe0
 #       define R300_TXO_OFFSET_SHIFT             5
 	/* END: Guess from R200 */
@@ -1360,6 +1361,7 @@
 #       define R300_COLORPITCH_MASK              0x00001FF8 /* GUESS */
 #       define R300_COLOR_TILE_ENABLE            (1 << 16) /* GUESS */
 #       define R300_COLOR_MICROTILE_ENABLE       (1 << 17) /* GUESS */
+#       define R300_COLOR_MICROTILE_SQUARE_ENABLE (2 << 17)
 #       define R300_COLOR_ENDIAN_NO_SWAP         (0 << 18) /* GUESS */
 #       define R300_COLOR_ENDIAN_WORD_SWAP       (1 << 18) /* GUESS */
 #       define R300_COLOR_ENDIAN_DWORD_SWAP      (2 << 18) /* GUESS */
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index d937324..c7593b8 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -40,28 +40,6 @@
 	rdev->config.r300.reg_safe_bm_size = ARRAY_SIZE(r420_reg_safe_bm);
 }
 
-int r420_mc_init(struct radeon_device *rdev)
-{
-	int r;
-
-	/* Setup GPU memory space */
-	rdev->mc.vram_location = 0xFFFFFFFFUL;
-	rdev->mc.gtt_location = 0xFFFFFFFFUL;
-	if (rdev->flags & RADEON_IS_AGP) {
-		r = radeon_agp_init(rdev);
-		if (r) {
-			radeon_agp_disable(rdev);
-		} else {
-			rdev->mc.gtt_location = rdev->mc.agp_base;
-		}
-	}
-	r = radeon_mc_setup(rdev);
-	if (r) {
-		return r;
-	}
-	return 0;
-}
-
 void r420_pipes_init(struct radeon_device *rdev)
 {
 	unsigned tmp;
@@ -69,7 +47,8 @@
 	unsigned num_pipes;
 
 	/* GA_ENHANCE workaround TCL deadlock issue */
-	WREG32(0x4274, (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3));
+	WREG32(R300_GA_ENHANCE, R300_GA_DEADLOCK_CNTL | R300_GA_FASTSYNC_CNTL |
+	       (1 << 2) | (1 << 3));
 	/* add idle wait as per freedesktop.org bug 24041 */
 	if (r100_gui_wait_for_idle(rdev)) {
 		printk(KERN_WARNING "Failed to wait GUI idle while "
@@ -97,17 +76,17 @@
 		tmp = (7 << 1);
 		break;
 	}
-	WREG32(0x42C8, (1 << num_pipes) - 1);
+	WREG32(R500_SU_REG_DEST, (1 << num_pipes) - 1);
 	/* Sub pixel 1/12 so we can have 4K rendering according to doc */
-	tmp |= (1 << 4) | (1 << 0);
-	WREG32(0x4018, tmp);
+	tmp |= R300_TILE_SIZE_16 | R300_ENABLE_TILING;
+	WREG32(R300_GB_TILE_CONFIG, tmp);
 	if (r100_gui_wait_for_idle(rdev)) {
 		printk(KERN_WARNING "Failed to wait GUI idle while "
 		       "programming pipes. Bad things might happen.\n");
 	}
 
-	tmp = RREG32(0x170C);
-	WREG32(0x170C, tmp | (1 << 31));
+	tmp = RREG32(R300_DST_PIPE_CONFIG);
+	WREG32(R300_DST_PIPE_CONFIG, tmp | R300_PIPE_AUTO_CONFIG);
 
 	WREG32(R300_RB2D_DSTCACHE_MODE,
 	       RREG32(R300_RB2D_DSTCACHE_MODE) |
@@ -348,13 +327,15 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	r300_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = r420_mc_init(rdev);
-	if (r) {
-		return r;
+	/* initialize AGP */
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			radeon_agp_disable(rdev);
+		}
 	}
+	/* initialize memory controller */
+	r300_mc_init(rdev);
 	r420_debugfs(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
diff --git a/drivers/gpu/drm/radeon/r500_reg.h b/drivers/gpu/drm/radeon/r500_reg.h
index 74ad89b..0cf2ad2 100644
--- a/drivers/gpu/drm/radeon/r500_reg.h
+++ b/drivers/gpu/drm/radeon/r500_reg.h
@@ -717,54 +717,62 @@
 #define AVIVO_DVOA_BIT_DEPTH_CONTROL			0x7988
 
 #define AVIVO_DC_GPIO_HPD_A                 0x7e94
-
-#define AVIVO_GPIO_0                        0x7e30
-#define AVIVO_GPIO_1                        0x7e40
-#define AVIVO_GPIO_2                        0x7e50
-#define AVIVO_GPIO_3                        0x7e60
-
 #define AVIVO_DC_GPIO_HPD_Y                 0x7e9c
 
-#define AVIVO_I2C_STATUS					0x7d30
-#	define AVIVO_I2C_STATUS_DONE				(1 << 0)
-#	define AVIVO_I2C_STATUS_NACK				(1 << 1)
-#	define AVIVO_I2C_STATUS_HALT				(1 << 2)
-#	define AVIVO_I2C_STATUS_GO				(1 << 3)
-#	define AVIVO_I2C_STATUS_MASK				0x7
-/* If radeon_mm_i2c is to be believed, this is HALT, NACK, and maybe
- * DONE? */
-#	define AVIVO_I2C_STATUS_CMD_RESET			0x7
-#	define AVIVO_I2C_STATUS_CMD_WAIT			(1 << 3)
-#define AVIVO_I2C_STOP						0x7d34
-#define AVIVO_I2C_START_CNTL				0x7d38
-#	define AVIVO_I2C_START						(1 << 8)
-#	define AVIVO_I2C_CONNECTOR0					(0 << 16)
-#	define AVIVO_I2C_CONNECTOR1					(1 << 16)
-#define R520_I2C_START (1<<0)
-#define R520_I2C_STOP (1<<1)
-#define R520_I2C_RX (1<<2)
-#define R520_I2C_EN (1<<8)
-#define R520_I2C_DDC1 (0<<16)
-#define R520_I2C_DDC2 (1<<16)
-#define R520_I2C_DDC3 (2<<16)
-#define R520_I2C_DDC_MASK (3<<16)
-#define AVIVO_I2C_CONTROL2					0x7d3c
-#	define AVIVO_I2C_7D3C_SIZE_SHIFT			8
-#	define AVIVO_I2C_7D3C_SIZE_MASK				(0xf << 8)
-#define AVIVO_I2C_CONTROL3						0x7d40
-/* Reading is done 4 bytes at a time: read the bottom 8 bits from
- * 7d44, four times in a row.
- * Writing is a little more complex.  First write DATA with
- * 0xnnnnnnzz, then 0xnnnnnnyy, where nnnnnn is some non-deterministic
- * magic number, zz is, I think, the slave address, and yy is the byte
- * you want to write. */
-#define AVIVO_I2C_DATA						0x7d44
-#define R520_I2C_ADDR_COUNT_MASK (0x7)
-#define R520_I2C_DATA_COUNT_SHIFT (8)
-#define R520_I2C_DATA_COUNT_MASK (0xF00)
-#define AVIVO_I2C_CNTL						0x7d50
-#	define AVIVO_I2C_EN							(1 << 0)
-#	define AVIVO_I2C_RESET						(1 << 8)
+#define AVIVO_DC_I2C_STATUS1				0x7d30
+#	define AVIVO_DC_I2C_DONE			(1 << 0)
+#	define AVIVO_DC_I2C_NACK			(1 << 1)
+#	define AVIVO_DC_I2C_HALT			(1 << 2)
+#	define AVIVO_DC_I2C_GO			        (1 << 3)
+#define AVIVO_DC_I2C_RESET 				0x7d34
+#	define AVIVO_DC_I2C_SOFT_RESET			(1 << 0)
+#	define AVIVO_DC_I2C_ABORT			(1 << 8)
+#define AVIVO_DC_I2C_CONTROL1 				0x7d38
+#	define AVIVO_DC_I2C_START			(1 << 0)
+#	define AVIVO_DC_I2C_STOP			(1 << 1)
+#	define AVIVO_DC_I2C_RECEIVE			(1 << 2)
+#	define AVIVO_DC_I2C_EN			        (1 << 8)
+#	define AVIVO_DC_I2C_PIN_SELECT(x)		((x) << 16)
+#	define AVIVO_SEL_DDC1			        0
+#	define AVIVO_SEL_DDC2			        1
+#	define AVIVO_SEL_DDC3			        2
+#define AVIVO_DC_I2C_CONTROL2 				0x7d3c
+#	define AVIVO_DC_I2C_ADDR_COUNT(x)		((x) << 0)
+#	define AVIVO_DC_I2C_DATA_COUNT(x)		((x) << 8)
+#define AVIVO_DC_I2C_CONTROL3 				0x7d40
+#	define AVIVO_DC_I2C_DATA_DRIVE_EN		(1 << 0)
+#	define AVIVO_DC_I2C_DATA_DRIVE_SEL		(1 << 1)
+#	define AVIVO_DC_I2C_CLK_DRIVE_EN		(1 << 7)
+#	define AVIVO_DC_I2C_RD_INTRA_BYTE_DELAY(x)      ((x) << 8)
+#	define AVIVO_DC_I2C_WR_INTRA_BYTE_DELAY(x)	((x) << 16)
+#	define AVIVO_DC_I2C_TIME_LIMIT(x)		((x) << 24)
+#define AVIVO_DC_I2C_DATA 				0x7d44
+#define AVIVO_DC_I2C_INTERRUPT_CONTROL 			0x7d48
+#	define AVIVO_DC_I2C_INTERRUPT_STATUS		(1 << 0)
+#	define AVIVO_DC_I2C_INTERRUPT_AK		(1 << 8)
+#	define AVIVO_DC_I2C_INTERRUPT_ENABLE		(1 << 16)
+#define AVIVO_DC_I2C_ARBITRATION 			0x7d50
+#	define AVIVO_DC_I2C_SW_WANTS_TO_USE_I2C		(1 << 0)
+#	define AVIVO_DC_I2C_SW_CAN_USE_I2C		(1 << 1)
+#	define AVIVO_DC_I2C_SW_DONE_USING_I2C		(1 << 8)
+#	define AVIVO_DC_I2C_HW_NEEDS_I2C		(1 << 9)
+#	define AVIVO_DC_I2C_ABORT_HDCP_I2C		(1 << 16)
+#	define AVIVO_DC_I2C_HW_USING_I2C		(1 << 17)
+
+#define AVIVO_DC_GPIO_DDC1_MASK 		        0x7e40
+#define AVIVO_DC_GPIO_DDC1_A 		                0x7e44
+#define AVIVO_DC_GPIO_DDC1_EN 		                0x7e48
+#define AVIVO_DC_GPIO_DDC1_Y 		                0x7e4c
+
+#define AVIVO_DC_GPIO_DDC2_MASK 		        0x7e50
+#define AVIVO_DC_GPIO_DDC2_A 		                0x7e54
+#define AVIVO_DC_GPIO_DDC2_EN 		                0x7e58
+#define AVIVO_DC_GPIO_DDC2_Y 		                0x7e5c
+
+#define AVIVO_DC_GPIO_DDC3_MASK 		        0x7e60
+#define AVIVO_DC_GPIO_DDC3_A 		                0x7e64
+#define AVIVO_DC_GPIO_DDC3_EN 		                0x7e68
+#define AVIVO_DC_GPIO_DDC3_Y 		                0x7e6c
 
 #define AVIVO_DISP_INTERRUPT_STATUS                             0x7edc
 #       define AVIVO_D1_VBLANK_INTERRUPT                        (1 << 4)
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index ddf5731..2b8a5dd 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -119,13 +119,15 @@
 		rdev->mc.vram_width *= 2;
 }
 
-void r520_vram_info(struct radeon_device *rdev)
+void r520_mc_init(struct radeon_device *rdev)
 {
 	fixed20_12 a;
 
 	r520_vram_get_type(rdev);
-
 	r100_vram_init_sizes(rdev);
+	radeon_vram_location(rdev, &rdev->mc, 0);
+	if (!(rdev->flags & RADEON_IS_AGP))
+		radeon_gtt_location(rdev, &rdev->mc);
 	/* FIXME: we should enforce default clock in case GPU is not in
 	 * default setup
 	 */
@@ -267,12 +269,15 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	r520_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = r420_mc_init(rdev);
-	if (r)
-		return r;
+	/* initialize AGP */
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			radeon_agp_disable(rdev);
+		}
+	}
+	/* initialize memory controller */
+	r520_mc_init(rdev);
 	rv515_debugfs(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 2ffcf5a..c522901 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -353,23 +353,14 @@
 /*
  * R600 PCIE GART
  */
-int r600_gart_clear_page(struct radeon_device *rdev, int i)
-{
-	void __iomem *ptr = (void *)rdev->gart.table.vram.ptr;
-	u64 pte;
-
-	if (i < 0 || i > rdev->gart.num_gpu_pages)
-		return -EINVAL;
-	pte = 0;
-	writeq(pte, ((void __iomem *)ptr) + (i * 8));
-	return 0;
-}
-
 void r600_pcie_gart_tlb_flush(struct radeon_device *rdev)
 {
 	unsigned i;
 	u32 tmp;
 
+	/* flush hdp cache so updates hit vram */
+	WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+
 	WREG32(VM_CONTEXT0_INVALIDATION_LOW_ADDR, rdev->mc.gtt_start >> 12);
 	WREG32(VM_CONTEXT0_INVALIDATION_HIGH_ADDR, (rdev->mc.gtt_end - 1) >> 12);
 	WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
@@ -416,6 +407,7 @@
 	r = radeon_gart_table_vram_pin(rdev);
 	if (r)
 		return r;
+	radeon_gart_restore(rdev);
 
 	/* Setup L2 cache */
 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
@@ -619,6 +611,68 @@
 	rv515_vga_render_disable(rdev);
 }
 
+/**
+ * r600_vram_gtt_location - try to find VRAM & GTT location
+ * @rdev: radeon device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ *
+ * Function will place try to place VRAM at same place as in CPU (PCI)
+ * address space as some GPU seems to have issue when we reprogram at
+ * different address space.
+ *
+ * If there is not enough space to fit the unvisible VRAM after the
+ * aperture then we limit the VRAM size to the aperture.
+ *
+ * If we are using AGP then place VRAM adjacent to AGP aperture are we need
+ * them to be in one from GPU point of view so that we can program GPU to
+ * catch access outside them (weird GPU policy see ??).
+ *
+ * This function will never fails, worst case are limiting VRAM or GTT.
+ *
+ * Note: GTT start, end, size should be initialized before calling this
+ * function on AGP platform.
+ */
+void r600_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
+{
+	u64 size_bf, size_af;
+
+	if (mc->mc_vram_size > 0xE0000000) {
+		/* leave room for at least 512M GTT */
+		dev_warn(rdev->dev, "limiting VRAM\n");
+		mc->real_vram_size = 0xE0000000;
+		mc->mc_vram_size = 0xE0000000;
+	}
+	if (rdev->flags & RADEON_IS_AGP) {
+		size_bf = mc->gtt_start;
+		size_af = 0xFFFFFFFF - mc->gtt_end + 1;
+		if (size_bf > size_af) {
+			if (mc->mc_vram_size > size_bf) {
+				dev_warn(rdev->dev, "limiting VRAM\n");
+				mc->real_vram_size = size_bf;
+				mc->mc_vram_size = size_bf;
+			}
+			mc->vram_start = mc->gtt_start - mc->mc_vram_size;
+		} else {
+			if (mc->mc_vram_size > size_af) {
+				dev_warn(rdev->dev, "limiting VRAM\n");
+				mc->real_vram_size = size_af;
+				mc->mc_vram_size = size_af;
+			}
+			mc->vram_start = mc->gtt_end;
+		}
+		mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+		dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n",
+				mc->mc_vram_size >> 20, mc->vram_start,
+				mc->vram_end, mc->real_vram_size >> 20);
+	} else {
+		u64 base = 0;
+		if (rdev->flags & RADEON_IS_IGP)
+			base = (RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
+		radeon_vram_location(rdev, &rdev->mc, base);
+		radeon_gtt_location(rdev, mc);
+	}
+}
+
 int r600_mc_init(struct radeon_device *rdev)
 {
 	fixed20_12 a;
@@ -658,75 +712,21 @@
 	/* Setup GPU memory space */
 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
-
-	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
+	rdev->mc.visible_vram_size = rdev->mc.aper_size;
+	/* FIXME remove this once we support unmappable VRAM */
+	if (rdev->mc.mc_vram_size > rdev->mc.aper_size) {
 		rdev->mc.mc_vram_size = rdev->mc.aper_size;
-
-	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
 		rdev->mc.real_vram_size = rdev->mc.aper_size;
-
-	if (rdev->flags & RADEON_IS_AGP) {
-		/* gtt_size is setup by radeon_agp_init */
-		rdev->mc.gtt_location = rdev->mc.agp_base;
-		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
-		/* Try to put vram before or after AGP because we
-		 * we want SYSTEM_APERTURE to cover both VRAM and
-		 * AGP so that GPU can catch out of VRAM/AGP access
-		 */
-		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
-			/* Enough place before */
-			rdev->mc.vram_location = rdev->mc.gtt_location -
-							rdev->mc.mc_vram_size;
-		} else if (tmp > rdev->mc.mc_vram_size) {
-			/* Enough place after */
-			rdev->mc.vram_location = rdev->mc.gtt_location +
-							rdev->mc.gtt_size;
-		} else {
-			/* Try to setup VRAM then AGP might not
-			 * not work on some card
-			 */
-			rdev->mc.vram_location = 0x00000000UL;
-			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
-		}
-	} else {
-		rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
-		rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) &
-							0xFFFF) << 24;
-		tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
-		if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
-			/* Enough place after vram */
-			rdev->mc.gtt_location = tmp;
-		} else if (rdev->mc.vram_location >= rdev->mc.gtt_size) {
-			/* Enough place before vram */
-			rdev->mc.gtt_location = 0;
-		} else {
-			/* Not enough place after or before shrink
-			 * gart size
-			 */
-			if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) {
-				rdev->mc.gtt_location = 0;
-				rdev->mc.gtt_size = rdev->mc.vram_location;
-			} else {
-				rdev->mc.gtt_location = tmp;
-				rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp;
-			}
-		}
-		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
 	}
-	rdev->mc.vram_start = rdev->mc.vram_location;
-	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-	rdev->mc.gtt_start = rdev->mc.gtt_location;
-	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	r600_vram_gtt_location(rdev, &rdev->mc);
 	/* FIXME: we should enforce default clock in case GPU is not in
 	 * default setup
 	 */
 	a.full = rfixed_const(100);
 	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
 	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
-
 	if (rdev->flags & RADEON_IS_IGP)
 		rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
-
 	return 0;
 }
 
@@ -981,6 +981,9 @@
 {
 	u32 tiling_config;
 	u32 ramcfg;
+	u32 backend_map;
+	u32 cc_rb_backend_disable;
+	u32 cc_gc_shader_pipe_config;
 	u32 tmp;
 	int i, j;
 	u32 sq_config;
@@ -1090,8 +1093,11 @@
 	default:
 		break;
 	}
+	rdev->config.r600.tiling_npipes = rdev->config.r600.max_tile_pipes;
+	rdev->config.r600.tiling_nbanks = 4 << ((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
 	tiling_config |= BANK_TILING((ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
 	tiling_config |= GROUP_SIZE(0);
+	rdev->config.r600.tiling_group_size = 256;
 	tmp = (ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
 	if (tmp > 3) {
 		tiling_config |= ROW_TILING(3);
@@ -1101,24 +1107,33 @@
 		tiling_config |= SAMPLE_SPLIT(tmp);
 	}
 	tiling_config |= BANK_SWAPS(1);
-	tmp = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes,
-						rdev->config.r600.max_backends,
-						(0xff << rdev->config.r600.max_backends) & 0xff);
-	tiling_config |= BACKEND_MAP(tmp);
+
+	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
+	cc_rb_backend_disable |=
+		BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK);
+
+	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
+	cc_gc_shader_pipe_config |=
+		INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK);
+	cc_gc_shader_pipe_config |=
+		INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK);
+
+	backend_map = r600_get_tile_pipe_to_backend_map(rdev->config.r600.max_tile_pipes,
+							(R6XX_MAX_BACKENDS -
+							 r600_count_pipe_bits((cc_rb_backend_disable &
+									       R6XX_MAX_BACKENDS_MASK) >> 16)),
+							(cc_rb_backend_disable >> 16));
+
+	tiling_config |= BACKEND_MAP(backend_map);
 	WREG32(GB_TILING_CONFIG, tiling_config);
 	WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
 	WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
 
-	tmp = BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << rdev->config.r600.max_backends) & R6XX_MAX_BACKENDS_MASK);
-	WREG32(CC_RB_BACKEND_DISABLE, tmp);
-
 	/* Setup pipes */
-	tmp = INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << rdev->config.r600.max_pipes) & R6XX_MAX_PIPES_MASK);
-	tmp |= INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << rdev->config.r600.max_simds) & R6XX_MAX_SIMDS_MASK);
-	WREG32(CC_GC_SHADER_PIPE_CONFIG, tmp);
-	WREG32(GC_USER_SHADER_PIPE_CONFIG, tmp);
+	WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
+	WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
 
-	tmp = R6XX_MAX_BACKENDS - r600_count_pipe_bits(tmp & INACTIVE_QD_PIPES_MASK);
+	tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
 	WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
 	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((tmp * 4) - 2) & VTX_REUSE_DEPTH_MASK);
 
@@ -1783,12 +1798,17 @@
 			  struct radeon_fence *fence)
 {
 	/* Also consider EVENT_WRITE_EOP.  it handles the interrupts + timestamps + events */
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
+	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
+	/* wait for 3D idle clean */
+	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
 	/* Emit fence sequence & fire IRQ */
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 	radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
 	radeon_ring_write(rdev, fence->seq);
-	radeon_ring_write(rdev, PACKET0(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
-	radeon_ring_write(rdev, 1);
 	/* CP_INTERRUPT packet 3 no longer exists, use packet 0 */
 	radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0));
 	radeon_ring_write(rdev, RB_INT_STAT);
@@ -2745,6 +2765,7 @@
 			case 0: /* D1 vblank */
 				if (disp_int & LB_D1_VBLANK_INTERRUPT) {
 					drm_handle_vblank(rdev->ddev, 0);
+					wake_up(&rdev->irq.vblank_queue);
 					disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
 				}
@@ -2765,6 +2786,7 @@
 			case 0: /* D2 vblank */
 				if (disp_int & LB_D2_VBLANK_INTERRUPT) {
 					drm_handle_vblank(rdev->ddev, 1);
+					wake_up(&rdev->irq.vblank_queue);
 					disp_int &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
 				}
diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c
index 0dcb690..db92801 100644
--- a/drivers/gpu/drm/radeon/r600_audio.c
+++ b/drivers/gpu/drm/radeon/r600_audio.c
@@ -35,7 +35,7 @@
  */
 static int r600_audio_chipset_supported(struct radeon_device *rdev)
 {
-	return (rdev->family >= CHIP_R600 && rdev->family < CHIP_RV710)
+	return rdev->family >= CHIP_R600
 		|| rdev->family == CHIP_RS600
 		|| rdev->family == CHIP_RS690
 		|| rdev->family == CHIP_RS740;
@@ -147,15 +147,23 @@
 }
 
 /*
+ * turn on/off audio engine
+ */
+static void r600_audio_engine_enable(struct radeon_device *rdev, bool enable)
+{
+	DRM_INFO("%s audio support", enable ? "Enabling" : "Disabling");
+	WREG32_P(R600_AUDIO_ENABLE, enable ? 0x81000000 : 0x0, ~0x81000000);
+}
+
+/*
  * initialize the audio vars and register the update timer
  */
 int r600_audio_init(struct radeon_device *rdev)
 {
-	if (!r600_audio_chipset_supported(rdev))
+	if (!radeon_audio || !r600_audio_chipset_supported(rdev))
 		return 0;
 
-	DRM_INFO("%s audio support", radeon_audio ? "Enabling" : "Disabling");
-	WREG32_P(R600_AUDIO_ENABLE, radeon_audio ? 0x81000000 : 0x0, ~0x81000000);
+	r600_audio_engine_enable(rdev, true);
 
 	rdev->audio_channels = -1;
 	rdev->audio_rate = -1;
@@ -258,9 +266,10 @@
  */
 void r600_audio_fini(struct radeon_device *rdev)
 {
-	if (!r600_audio_chipset_supported(rdev))
+	if (!radeon_audio || !r600_audio_chipset_supported(rdev))
 		return;
 
 	del_timer(&rdev->audio_timer);
-	WREG32_P(R600_AUDIO_ENABLE, 0x0, ~0x81000000);
+
+	r600_audio_engine_enable(rdev, false);
 }
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 5ea4323..f4fb88e 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -49,7 +49,7 @@
 	RING_LOCALS;
 	DRM_DEBUG("\n");
 
-	h = (h + 7) & ~7;
+	h = ALIGN(h, 8);
 	if (h < 8)
 		h = 8;
 
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 446b765..f6c6c77 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -25,7 +25,7 @@
 	u32 cb_color_info;
 	int pitch, slice;
 
-	h = (h + 7) & ~7;
+	h = ALIGN(h, 8);
 	if (h < 8)
 		h = 8;
 
@@ -396,15 +396,13 @@
 				    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
 
 	/* emit an IB pointing at default state */
-	dwords = (rdev->r600_blit.state_len + 0xf) & ~0xf;
+	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
 	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
 	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC);
 	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
 	radeon_ring_write(rdev, dwords);
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
-	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
 	/* SQ config */
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
 	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
@@ -578,9 +576,9 @@
 	ring_size = num_loops * dwords_per_loop;
 	/* set default  + shaders */
 	ring_size += 40; /* shaders + def state */
-	ring_size += 7; /* fence emit for VB IB */
+	ring_size += 10; /* fence emit for VB IB */
 	ring_size += 5; /* done copy */
-	ring_size += 7; /* fence emit for done copy */
+	ring_size += 10; /* fence emit for done copy */
 	r = radeon_ring_lock(rdev, ring_size);
 	if (r)
 		return r;
@@ -594,13 +592,6 @@
 {
 	int r;
 
-	radeon_ring_write(rdev, PACKET3(PACKET3_EVENT_WRITE, 0));
-	radeon_ring_write(rdev, CACHE_FLUSH_AND_INV_EVENT);
-	/* wait for 3D idle clean */
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(rdev, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-	radeon_ring_write(rdev, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
-
 	if (rdev->r600_blit.vb_ib)
 		r600_vb_ib_put(rdev);
 
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c
index d745e81..a112c59 100644
--- a/drivers/gpu/drm/radeon/r600_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.c
@@ -9,11 +9,6 @@
 	0xc0012800,
 	0x80000000,
 	0x80000000,
-	0xc0004600,
-	0x00000016,
-	0xc0016800,
-	0x00000010,
-	0x00028000,
 	0xc0016800,
 	0x00000010,
 	0x00008000,
@@ -531,11 +526,6 @@
 	0xc0012800,
 	0x80000000,
 	0x80000000,
-	0xc0004600,
-	0x00000016,
-	0xc0016800,
-	0x00000010,
-	0x00028000,
 	0xc0016800,
 	0x00000010,
 	0x00008000,
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 75bcf35..40416c0 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -734,8 +734,8 @@
 	u32 hdp_host_path_cntl;
 	u32 backend_map;
 	u32 gb_tiling_config = 0;
-	u32 cc_rb_backend_disable = 0;
-	u32 cc_gc_shader_pipe_config = 0;
+	u32 cc_rb_backend_disable;
+	u32 cc_gc_shader_pipe_config;
 	u32 ramcfg;
 
 	/* setup chip specs */
@@ -857,29 +857,44 @@
 
 	gb_tiling_config |= R600_BANK_SWAPS(1);
 
-	backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
-							dev_priv->r600_max_backends,
-							(0xff << dev_priv->r600_max_backends) & 0xff);
-	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
+	cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
+	cc_rb_backend_disable |=
+		R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
 
-	cc_gc_shader_pipe_config =
+	cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
+	cc_gc_shader_pipe_config |=
 		R600_INACTIVE_QD_PIPES((R6XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R6XX_MAX_PIPES_MASK);
 	cc_gc_shader_pipe_config |=
 		R600_INACTIVE_SIMDS((R6XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R6XX_MAX_SIMDS_MASK);
 
-	cc_rb_backend_disable =
-		R600_BACKEND_DISABLE((R6XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R6XX_MAX_BACKENDS_MASK);
+	backend_map = r600_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
+							(R6XX_MAX_BACKENDS -
+							 r600_count_pipe_bits((cc_rb_backend_disable &
+									       R6XX_MAX_BACKENDS_MASK) >> 16)),
+							(cc_rb_backend_disable >> 16));
+	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
 
 	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
 	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
 	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
+	if (gb_tiling_config & 0xc0) {
+		dev_priv->r600_group_size = 512;
+	} else {
+		dev_priv->r600_group_size = 256;
+	}
+	dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
+	if (gb_tiling_config & 0x30) {
+		dev_priv->r600_nbanks = 8;
+	} else {
+		dev_priv->r600_nbanks = 4;
+	}
 
 	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
 	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
 	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
 
 	num_qd_pipes =
-		R6XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
+		R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
 	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
 	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
 
@@ -1151,7 +1166,8 @@
 
 }
 
-static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+static u32 r700_get_tile_pipe_to_backend_map(drm_radeon_private_t *dev_priv,
+					     u32 num_tile_pipes,
 					     u32 num_backends,
 					     u32 backend_disable_mask)
 {
@@ -1162,6 +1178,7 @@
 	u32 swizzle_pipe[R7XX_MAX_PIPES];
 	u32 cur_backend;
 	u32 i;
+	bool force_no_swizzle;
 
 	if (num_tile_pipes > R7XX_MAX_PIPES)
 		num_tile_pipes = R7XX_MAX_PIPES;
@@ -1191,6 +1208,18 @@
 	if (enabled_backends_count != num_backends)
 		num_backends = enabled_backends_count;
 
+	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
+	case CHIP_RV770:
+	case CHIP_RV730:
+		force_no_swizzle = false;
+		break;
+	case CHIP_RV710:
+	case CHIP_RV740:
+	default:
+		force_no_swizzle = true;
+		break;
+	}
+
 	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
 	switch (num_tile_pipes) {
 	case 1:
@@ -1201,49 +1230,100 @@
 		swizzle_pipe[1] = 1;
 		break;
 	case 3:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 1;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 1;
+		}
 		break;
 	case 4:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 3;
-		swizzle_pipe[3] = 1;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 3;
+			swizzle_pipe[3] = 1;
+		}
 		break;
 	case 5:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 1;
-		swizzle_pipe[4] = 3;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 1;
+			swizzle_pipe[4] = 3;
+		}
 		break;
 	case 6:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 5;
-		swizzle_pipe[4] = 3;
-		swizzle_pipe[5] = 1;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 5;
+			swizzle_pipe[4] = 3;
+			swizzle_pipe[5] = 1;
+		}
 		break;
 	case 7:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 6;
-		swizzle_pipe[4] = 3;
-		swizzle_pipe[5] = 1;
-		swizzle_pipe[6] = 5;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+			swizzle_pipe[6] = 6;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 6;
+			swizzle_pipe[4] = 3;
+			swizzle_pipe[5] = 1;
+			swizzle_pipe[6] = 5;
+		}
 		break;
 	case 8:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 6;
-		swizzle_pipe[4] = 3;
-		swizzle_pipe[5] = 1;
-		swizzle_pipe[6] = 7;
-		swizzle_pipe[7] = 5;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+			swizzle_pipe[6] = 6;
+			swizzle_pipe[7] = 7;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 6;
+			swizzle_pipe[4] = 3;
+			swizzle_pipe[5] = 1;
+			swizzle_pipe[6] = 7;
+			swizzle_pipe[7] = 5;
+		}
 		break;
 	}
 
@@ -1264,8 +1344,10 @@
 			  drm_radeon_private_t *dev_priv)
 {
 	int i, j, num_qd_pipes;
+	u32 ta_aux_cntl;
 	u32 sx_debug_1;
 	u32 smx_dc_ctl0;
+	u32 db_debug3;
 	u32 num_gs_verts_per_thread;
 	u32 vgt_gs_per_es;
 	u32 gs_prim_buffer_depth = 0;
@@ -1276,8 +1358,8 @@
 	u32 sq_dyn_gpr_size_simd_ab_0;
 	u32 backend_map;
 	u32 gb_tiling_config = 0;
-	u32 cc_rb_backend_disable = 0;
-	u32 cc_gc_shader_pipe_config = 0;
+	u32 cc_rb_backend_disable;
+	u32 cc_gc_shader_pipe_config;
 	u32 mc_arb_ramcfg;
 	u32 db_debug4;
 
@@ -1428,38 +1510,51 @@
 
 	gb_tiling_config |= R600_BANK_SWAPS(1);
 
-	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)
-		backend_map = 0x28;
-	else
-		backend_map = r700_get_tile_pipe_to_backend_map(dev_priv->r600_max_tile_pipes,
-								dev_priv->r600_max_backends,
-								(0xff << dev_priv->r600_max_backends) & 0xff);
-	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
+	cc_rb_backend_disable = RADEON_READ(R600_CC_RB_BACKEND_DISABLE) & 0x00ff0000;
+	cc_rb_backend_disable |=
+		R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
 
-	cc_gc_shader_pipe_config =
+	cc_gc_shader_pipe_config = RADEON_READ(R600_CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
+	cc_gc_shader_pipe_config |=
 		R600_INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << dev_priv->r600_max_pipes) & R7XX_MAX_PIPES_MASK);
 	cc_gc_shader_pipe_config |=
 		R600_INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << dev_priv->r600_max_simds) & R7XX_MAX_SIMDS_MASK);
 
-	cc_rb_backend_disable =
-		R600_BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << dev_priv->r600_max_backends) & R7XX_MAX_BACKENDS_MASK);
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV740)
+		backend_map = 0x28;
+	else
+		backend_map = r700_get_tile_pipe_to_backend_map(dev_priv,
+								dev_priv->r600_max_tile_pipes,
+								(R7XX_MAX_BACKENDS -
+								 r600_count_pipe_bits((cc_rb_backend_disable &
+										       R7XX_MAX_BACKENDS_MASK) >> 16)),
+								(cc_rb_backend_disable >> 16));
+	gb_tiling_config |= R600_BACKEND_MAP(backend_map);
 
 	RADEON_WRITE(R600_GB_TILING_CONFIG,      gb_tiling_config);
 	RADEON_WRITE(R600_DCP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
 	RADEON_WRITE(R600_HDP_TILING_CONFIG,    (gb_tiling_config & 0xffff));
+	if (gb_tiling_config & 0xc0) {
+		dev_priv->r600_group_size = 512;
+	} else {
+		dev_priv->r600_group_size = 256;
+	}
+	dev_priv->r600_npipes = 1 << ((gb_tiling_config >> 1) & 0x7);
+	if (gb_tiling_config & 0x30) {
+		dev_priv->r600_nbanks = 8;
+	} else {
+		dev_priv->r600_nbanks = 4;
+	}
 
 	RADEON_WRITE(R600_CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
 	RADEON_WRITE(R600_CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
-	RADEON_WRITE(R600_GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
 
 	RADEON_WRITE(R700_CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
 	RADEON_WRITE(R700_CGTS_SYS_TCC_DISABLE, 0);
 	RADEON_WRITE(R700_CGTS_TCC_DISABLE, 0);
-	RADEON_WRITE(R700_CGTS_USER_SYS_TCC_DISABLE, 0);
-	RADEON_WRITE(R700_CGTS_USER_TCC_DISABLE, 0);
 
 	num_qd_pipes =
-		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK);
+		R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & R600_INACTIVE_QD_PIPES_MASK) >> 8);
 	RADEON_WRITE(R600_VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & R600_DEALLOC_DIST_MASK);
 	RADEON_WRITE(R600_VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & R600_VTX_REUSE_DEPTH_MASK);
 
@@ -1469,10 +1564,8 @@
 
 	RADEON_WRITE(R600_CP_MEQ_THRESHOLDS, R700_STQ_SPLIT(0x30));
 
-	RADEON_WRITE(R600_TA_CNTL_AUX, (R600_DISABLE_CUBE_ANISO |
-					R600_SYNC_GRADIENT |
-					R600_SYNC_WALKER |
-					R600_SYNC_ALIGNER));
+	ta_aux_cntl = RADEON_READ(R600_TA_CNTL_AUX);
+	RADEON_WRITE(R600_TA_CNTL_AUX, ta_aux_cntl | R600_DISABLE_CUBE_ANISO);
 
 	sx_debug_1 = RADEON_READ(R700_SX_DEBUG_1);
 	sx_debug_1 |= R700_ENABLE_NEW_SMX_ADDRESS;
@@ -1483,14 +1576,28 @@
 	smx_dc_ctl0 |= R700_CACHE_DEPTH((dev_priv->r700_sx_num_of_sets * 64) - 1);
 	RADEON_WRITE(R600_SMX_DC_CTL0, smx_dc_ctl0);
 
-	RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
-					  R700_GS_FLUSH_CTL(4) |
-					  R700_ACK_FLUSH_CTL(3) |
-					  R700_SYNC_FLUSH_CTL));
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV740)
+		RADEON_WRITE(R700_SMX_EVENT_CTL, (R700_ES_FLUSH_CTL(4) |
+						  R700_GS_FLUSH_CTL(4) |
+						  R700_ACK_FLUSH_CTL(3) |
+						  R700_SYNC_FLUSH_CTL));
 
-	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV770)
-		RADEON_WRITE(R700_DB_DEBUG3, R700_DB_CLK_OFF_DELAY(0x1f));
-	else {
+	db_debug3 = RADEON_READ(R700_DB_DEBUG3);
+	db_debug3 &= ~R700_DB_CLK_OFF_DELAY(0x1f);
+	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
+	case CHIP_RV770:
+	case CHIP_RV740:
+		db_debug3 |= R700_DB_CLK_OFF_DELAY(0x1f);
+		break;
+	case CHIP_RV710:
+	case CHIP_RV730:
+	default:
+		db_debug3 |= R700_DB_CLK_OFF_DELAY(2);
+		break;
+	}
+	RADEON_WRITE(R700_DB_DEBUG3, db_debug3);
+
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) != CHIP_RV770) {
 		db_debug4 = RADEON_READ(RV700_DB_DEBUG4);
 		db_debug4 |= RV700_DISABLE_TILE_COVERED_FOR_PS_ITER;
 		RADEON_WRITE(RV700_DB_DEBUG4, db_debug4);
@@ -1519,10 +1626,10 @@
 			    R600_ALU_UPDATE_FIFO_HIWATER(0x8));
 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
 	case CHIP_RV770:
-		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
-		break;
 	case CHIP_RV730:
 	case CHIP_RV710:
+		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x1);
+		break;
 	case CHIP_RV740:
 	default:
 		sq_ms_fifo_sizes |= R600_FETCH_FIFO_HIWATER(0x4);
@@ -2529,3 +2636,12 @@
 	mutex_unlock(&dev_priv->cs_mutex);
 	return r;
 }
+
+void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size)
+{
+	struct drm_radeon_private *dev_priv = dev->dev_private;
+
+	*npipes = dev_priv->r600_npipes;
+	*nbanks = dev_priv->r600_nbanks;
+	*group_size = dev_priv->r600_group_size;
+}
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index e4c45ec..cd2c63b 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -28,6 +28,7 @@
 #include "drmP.h"
 #include "radeon.h"
 #include "r600d.h"
+#include "r600_reg_safe.h"
 
 static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
 					struct radeon_cs_reloc **cs_reloc);
@@ -35,11 +36,313 @@
 					struct radeon_cs_reloc **cs_reloc);
 typedef int (*next_reloc_t)(struct radeon_cs_parser*, struct radeon_cs_reloc**);
 static next_reloc_t r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_mm;
+extern void r600_cs_legacy_get_tiling_conf(struct drm_device *dev, u32 *npipes, u32 *nbanks, u32 *group_size);
+
 
 struct r600_cs_track {
-	u32	cb_color0_base_last;
+	/* configuration we miror so that we use same code btw kms/ums */
+	u32			group_size;
+	u32			nbanks;
+	u32			npipes;
+	/* value we track */
+	u32			nsamples;
+	u32			cb_color_base_last[8];
+	struct radeon_bo	*cb_color_bo[8];
+	u32			cb_color_bo_offset[8];
+	struct radeon_bo	*cb_color_frag_bo[8];
+	struct radeon_bo	*cb_color_tile_bo[8];
+	u32			cb_color_info[8];
+	u32			cb_color_size_idx[8];
+	u32			cb_target_mask;
+	u32			cb_shader_mask;
+	u32			cb_color_size[8];
+	u32			vgt_strmout_en;
+	u32			vgt_strmout_buffer_en;
+	u32			db_depth_control;
+	u32			db_depth_info;
+	u32			db_depth_size_idx;
+	u32			db_depth_view;
+	u32			db_depth_size;
+	u32			db_offset;
+	struct radeon_bo	*db_bo;
 };
 
+static inline int r600_bpe_from_format(u32 *bpe, u32 format)
+{
+	switch (format) {
+	case V_038004_COLOR_8:
+	case V_038004_COLOR_4_4:
+	case V_038004_COLOR_3_3_2:
+	case V_038004_FMT_1:
+		*bpe = 1;
+		break;
+	case V_038004_COLOR_16:
+	case V_038004_COLOR_16_FLOAT:
+	case V_038004_COLOR_8_8:
+	case V_038004_COLOR_5_6_5:
+	case V_038004_COLOR_6_5_5:
+	case V_038004_COLOR_1_5_5_5:
+	case V_038004_COLOR_4_4_4_4:
+	case V_038004_COLOR_5_5_5_1:
+		*bpe = 2;
+		break;
+	case V_038004_FMT_8_8_8:
+		*bpe = 3;
+		break;
+	case V_038004_COLOR_32:
+	case V_038004_COLOR_32_FLOAT:
+	case V_038004_COLOR_16_16:
+	case V_038004_COLOR_16_16_FLOAT:
+	case V_038004_COLOR_8_24:
+	case V_038004_COLOR_8_24_FLOAT:
+	case V_038004_COLOR_24_8:
+	case V_038004_COLOR_24_8_FLOAT:
+	case V_038004_COLOR_10_11_11:
+	case V_038004_COLOR_10_11_11_FLOAT:
+	case V_038004_COLOR_11_11_10:
+	case V_038004_COLOR_11_11_10_FLOAT:
+	case V_038004_COLOR_2_10_10_10:
+	case V_038004_COLOR_8_8_8_8:
+	case V_038004_COLOR_10_10_10_2:
+	case V_038004_FMT_5_9_9_9_SHAREDEXP:
+	case V_038004_FMT_32_AS_8:
+	case V_038004_FMT_32_AS_8_8:
+		*bpe = 4;
+		break;
+	case V_038004_COLOR_X24_8_32_FLOAT:
+	case V_038004_COLOR_32_32:
+	case V_038004_COLOR_32_32_FLOAT:
+	case V_038004_COLOR_16_16_16_16:
+	case V_038004_COLOR_16_16_16_16_FLOAT:
+		*bpe = 8;
+		break;
+	case V_038004_FMT_16_16_16:
+	case V_038004_FMT_16_16_16_FLOAT:
+		*bpe = 6;
+		break;
+	case V_038004_FMT_32_32_32:
+	case V_038004_FMT_32_32_32_FLOAT:
+		*bpe = 12;
+		break;
+	case V_038004_COLOR_32_32_32_32:
+	case V_038004_COLOR_32_32_32_32_FLOAT:
+		*bpe = 16;
+		break;
+	case V_038004_FMT_GB_GR:
+	case V_038004_FMT_BG_RG:
+	case V_038004_COLOR_INVALID:
+		*bpe = 16;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void r600_cs_track_init(struct r600_cs_track *track)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		track->cb_color_base_last[i] = 0;
+		track->cb_color_size[i] = 0;
+		track->cb_color_size_idx[i] = 0;
+		track->cb_color_info[i] = 0;
+		track->cb_color_bo[i] = NULL;
+		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
+	}
+	track->cb_target_mask = 0xFFFFFFFF;
+	track->cb_shader_mask = 0xFFFFFFFF;
+	track->db_bo = NULL;
+	/* assume the biggest format and that htile is enabled */
+	track->db_depth_info = 7 | (1 << 25);
+	track->db_depth_view = 0xFFFFC000;
+	track->db_depth_size = 0xFFFFFFFF;
+	track->db_depth_size_idx = 0;
+	track->db_depth_control = 0xFFFFFFFF;
+}
+
+static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
+{
+	struct r600_cs_track *track = p->track;
+	u32 bpe = 0, pitch, slice_tile_max, size, tmp, height;
+	volatile u32 *ib = p->ib->ptr;
+
+	if (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
+		dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n");
+		return -EINVAL;
+	}
+	size = radeon_bo_size(track->cb_color_bo[i]);
+	if (r600_bpe_from_format(&bpe, G_0280A0_FORMAT(track->cb_color_info[i]))) {
+		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n",
+			 __func__, __LINE__, G_0280A0_FORMAT(track->cb_color_info[i]),
+			i, track->cb_color_info[i]);
+		return -EINVAL;
+	}
+	pitch = (G_028060_PITCH_TILE_MAX(track->cb_color_size[i]) + 1) << 3;
+	slice_tile_max = G_028060_SLICE_TILE_MAX(track->cb_color_size[i]) + 1;
+	if (!pitch) {
+		dev_warn(p->dev, "%s:%d cb pitch (%d) for %d invalid (0x%08X)\n",
+			__func__, __LINE__, pitch, i, track->cb_color_size[i]);
+		return -EINVAL;
+	}
+	height = size / (pitch * bpe);
+	if (height > 8192)
+		height = 8192;
+	switch (G_0280A0_ARRAY_MODE(track->cb_color_info[i])) {
+	case V_0280A0_ARRAY_LINEAR_GENERAL:
+	case V_0280A0_ARRAY_LINEAR_ALIGNED:
+		if (pitch & 0x3f) {
+			dev_warn(p->dev, "%s:%d cb pitch (%d x %d = %d) invalid\n",
+				__func__, __LINE__, pitch, bpe, pitch * bpe);
+			return -EINVAL;
+		}
+		if ((pitch * bpe) & (track->group_size - 1)) {
+			dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
+				__func__, __LINE__, pitch);
+			return -EINVAL;
+		}
+		break;
+	case V_0280A0_ARRAY_1D_TILED_THIN1:
+		if ((pitch * 8 * bpe * track->nsamples) & (track->group_size - 1)) {
+			dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
+				__func__, __LINE__, pitch);
+			return -EINVAL;
+		}
+		height &= ~0x7;
+		if (!height)
+			height = 8;
+		break;
+	case V_0280A0_ARRAY_2D_TILED_THIN1:
+		if (pitch & ((8 * track->nbanks) - 1)) {
+			dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
+				__func__, __LINE__, pitch);
+			return -EINVAL;
+		}
+		tmp = pitch * 8 * bpe * track->nsamples;
+		tmp = tmp / track->nbanks;
+		if (tmp & (track->group_size - 1)) {
+			dev_warn(p->dev, "%s:%d cb pitch (%d) invalid\n",
+				__func__, __LINE__, pitch);
+			return -EINVAL;
+		}
+		height &= ~((16 * track->npipes) - 1);
+		if (!height)
+			height = 16 * track->npipes;
+		break;
+	default:
+		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
+			G_0280A0_ARRAY_MODE(track->cb_color_info[i]), i,
+			track->cb_color_info[i]);
+		return -EINVAL;
+	}
+	/* check offset */
+	tmp = height * pitch;
+	if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
+		dev_warn(p->dev, "%s offset[%d] %d to big\n", __func__, i, track->cb_color_bo_offset[i]);
+		return -EINVAL;
+	}
+	/* limit max tile */
+	tmp = (height * pitch) >> 6;
+	if (tmp < slice_tile_max)
+		slice_tile_max = tmp;
+	tmp = S_028060_PITCH_TILE_MAX((pitch >> 3) - 1) |
+		S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
+	ib[track->cb_color_size_idx[i]] = tmp;
+	return 0;
+}
+
+static int r600_cs_track_check(struct radeon_cs_parser *p)
+{
+	struct r600_cs_track *track = p->track;
+	u32 tmp;
+	int r, i;
+	volatile u32 *ib = p->ib->ptr;
+
+	/* on legacy kernel we don't perform advanced check */
+	if (p->rdev == NULL)
+		return 0;
+	/* we don't support out buffer yet */
+	if (track->vgt_strmout_en || track->vgt_strmout_buffer_en) {
+		dev_warn(p->dev, "this kernel doesn't support SMX output buffer\n");
+		return -EINVAL;
+	}
+	/* check that we have a cb for each enabled target, we don't check
+	 * shader_mask because it seems mesa isn't always setting it :(
+	 */
+	tmp = track->cb_target_mask;
+	for (i = 0; i < 8; i++) {
+		if ((tmp >> (i * 4)) & 0xF) {
+			/* at least one component is enabled */
+			if (track->cb_color_bo[i] == NULL) {
+				dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
+					__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
+				return -EINVAL;
+			}
+			/* perform rewrite of CB_COLOR[0-7]_SIZE */
+			r = r600_cs_track_validate_cb(p, i);
+			if (r)
+				return r;
+		}
+	}
+	/* Check depth buffer */
+	if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
+		G_028800_Z_ENABLE(track->db_depth_control)) {
+		u32 nviews, bpe, ntiles;
+		if (track->db_bo == NULL) {
+			dev_warn(p->dev, "z/stencil with no depth buffer\n");
+			return -EINVAL;
+		}
+		if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
+			dev_warn(p->dev, "this kernel doesn't support z/stencil htile\n");
+			return -EINVAL;
+		}
+		switch (G_028010_FORMAT(track->db_depth_info)) {
+		case V_028010_DEPTH_16:
+			bpe = 2;
+			break;
+		case V_028010_DEPTH_X8_24:
+		case V_028010_DEPTH_8_24:
+		case V_028010_DEPTH_X8_24_FLOAT:
+		case V_028010_DEPTH_8_24_FLOAT:
+		case V_028010_DEPTH_32_FLOAT:
+			bpe = 4;
+			break;
+		case V_028010_DEPTH_X24_8_32_FLOAT:
+			bpe = 8;
+			break;
+		default:
+			dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info));
+			return -EINVAL;
+		}
+		if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
+			if (!track->db_depth_size_idx) {
+				dev_warn(p->dev, "z/stencil buffer size not set\n");
+				return -EINVAL;
+			}
+			printk_once(KERN_WARNING "You have old & broken userspace please consider updating mesa\n");
+			tmp = radeon_bo_size(track->db_bo) - track->db_offset;
+			tmp = (tmp / bpe) >> 6;
+			if (!tmp) {
+				dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n",
+						track->db_depth_size, bpe, track->db_offset,
+						radeon_bo_size(track->db_bo));
+				return -EINVAL;
+			}
+			ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
+		} else {
+			ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
+			nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
+			tmp = ntiles * bpe * 64 * nviews;
+			if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
+				dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %d -> %d have %ld)\n",
+						track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
+						radeon_bo_size(track->db_bo));
+				return -EINVAL;
+			}
+		}
+	}
+	return 0;
+}
+
 /**
  * r600_cs_packet_parse() - parse cp packet and point ib index to next packet
  * @parser:	parser structure holding parsing context.
@@ -359,6 +662,334 @@
 	return 0;
 }
 
+/**
+ * r600_cs_check_reg() - check if register is authorized or not
+ * @parser: parser structure holding parsing context
+ * @reg: register we are testing
+ * @idx: index into the cs buffer
+ *
+ * This function will test against r600_reg_safe_bm and return 0
+ * if register is safe. If register is not flag as safe this function
+ * will test it against a list of register needind special handling.
+ */
+static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+{
+	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
+	struct radeon_cs_reloc *reloc;
+	u32 last_reg = ARRAY_SIZE(r600_reg_safe_bm);
+	u32 m, i, tmp, *ib;
+	int r;
+
+	i = (reg >> 7);
+	if (i > last_reg) {
+		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
+		return -EINVAL;
+	}
+	m = 1 << ((reg >> 2) & 31);
+	if (!(r600_reg_safe_bm[i] & m))
+		return 0;
+	ib = p->ib->ptr;
+	switch (reg) {
+	/* force following reg to 0 in an attemp to disable out buffer
+	 * which will need us to better understand how it works to perform
+	 * security check on it (Jerome)
+	 */
+	case R_0288A8_SQ_ESGS_RING_ITEMSIZE:
+	case R_008C44_SQ_ESGS_RING_SIZE:
+	case R_0288B0_SQ_ESTMP_RING_ITEMSIZE:
+	case R_008C54_SQ_ESTMP_RING_SIZE:
+	case R_0288C0_SQ_FBUF_RING_ITEMSIZE:
+	case R_008C74_SQ_FBUF_RING_SIZE:
+	case R_0288B4_SQ_GSTMP_RING_ITEMSIZE:
+	case R_008C5C_SQ_GSTMP_RING_SIZE:
+	case R_0288AC_SQ_GSVS_RING_ITEMSIZE:
+	case R_008C4C_SQ_GSVS_RING_SIZE:
+	case R_0288BC_SQ_PSTMP_RING_ITEMSIZE:
+	case R_008C6C_SQ_PSTMP_RING_SIZE:
+	case R_0288C4_SQ_REDUC_RING_ITEMSIZE:
+	case R_008C7C_SQ_REDUC_RING_SIZE:
+	case R_0288B8_SQ_VSTMP_RING_ITEMSIZE:
+	case R_008C64_SQ_VSTMP_RING_SIZE:
+	case R_0288C8_SQ_GS_VERT_ITEMSIZE:
+		/* get value to populate the IB don't remove */
+		tmp =radeon_get_ib_value(p, idx);
+		ib[idx] = 0;
+		break;
+	case R_028800_DB_DEPTH_CONTROL:
+		track->db_depth_control = radeon_get_ib_value(p, idx);
+		break;
+	case R_028010_DB_DEPTH_INFO:
+		track->db_depth_info = radeon_get_ib_value(p, idx);
+		break;
+	case R_028004_DB_DEPTH_VIEW:
+		track->db_depth_view = radeon_get_ib_value(p, idx);
+		break;
+	case R_028000_DB_DEPTH_SIZE:
+		track->db_depth_size = radeon_get_ib_value(p, idx);
+		track->db_depth_size_idx = idx;
+		break;
+	case R_028AB0_VGT_STRMOUT_EN:
+		track->vgt_strmout_en = radeon_get_ib_value(p, idx);
+		break;
+	case R_028B20_VGT_STRMOUT_BUFFER_EN:
+		track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx);
+		break;
+	case R_028238_CB_TARGET_MASK:
+		track->cb_target_mask = radeon_get_ib_value(p, idx);
+		break;
+	case R_02823C_CB_SHADER_MASK:
+		track->cb_shader_mask = radeon_get_ib_value(p, idx);
+		break;
+	case R_028C04_PA_SC_AA_CONFIG:
+		tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx));
+		track->nsamples = 1 << tmp;
+		break;
+	case R_0280A0_CB_COLOR0_INFO:
+	case R_0280A4_CB_COLOR1_INFO:
+	case R_0280A8_CB_COLOR2_INFO:
+	case R_0280AC_CB_COLOR3_INFO:
+	case R_0280B0_CB_COLOR4_INFO:
+	case R_0280B4_CB_COLOR5_INFO:
+	case R_0280B8_CB_COLOR6_INFO:
+	case R_0280BC_CB_COLOR7_INFO:
+		tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
+		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
+		break;
+	case R_028060_CB_COLOR0_SIZE:
+	case R_028064_CB_COLOR1_SIZE:
+	case R_028068_CB_COLOR2_SIZE:
+	case R_02806C_CB_COLOR3_SIZE:
+	case R_028070_CB_COLOR4_SIZE:
+	case R_028074_CB_COLOR5_SIZE:
+	case R_028078_CB_COLOR6_SIZE:
+	case R_02807C_CB_COLOR7_SIZE:
+		tmp = (reg - R_028060_CB_COLOR0_SIZE) / 4;
+		track->cb_color_size[tmp] = radeon_get_ib_value(p, idx);
+		track->cb_color_size_idx[tmp] = idx;
+		break;
+		/* This register were added late, there is userspace
+		 * which does provide relocation for those but set
+		 * 0 offset. In order to avoid breaking old userspace
+		 * we detect this and set address to point to last
+		 * CB_COLOR0_BASE, note that if userspace doesn't set
+		 * CB_COLOR0_BASE before this register we will report
+		 * error. Old userspace always set CB_COLOR0_BASE
+		 * before any of this.
+		 */
+	case R_0280E0_CB_COLOR0_FRAG:
+	case R_0280E4_CB_COLOR1_FRAG:
+	case R_0280E8_CB_COLOR2_FRAG:
+	case R_0280EC_CB_COLOR3_FRAG:
+	case R_0280F0_CB_COLOR4_FRAG:
+	case R_0280F4_CB_COLOR5_FRAG:
+	case R_0280F8_CB_COLOR6_FRAG:
+	case R_0280FC_CB_COLOR7_FRAG:
+		tmp = (reg - R_0280E0_CB_COLOR0_FRAG) / 4;
+		if (!r600_cs_packet_next_is_pkt3_nop(p)) {
+			if (!track->cb_color_base_last[tmp]) {
+				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
+				return -EINVAL;
+			}
+			ib[idx] = track->cb_color_base_last[tmp];
+			printk_once(KERN_WARNING "You have old & broken userspace "
+					"please consider updating mesa & xf86-video-ati\n");
+			track->cb_color_frag_bo[tmp] = track->cb_color_bo[tmp];
+		} else {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
+				return -EINVAL;
+			}
+			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+			track->cb_color_frag_bo[tmp] = reloc->robj;
+		}
+		break;
+	case R_0280C0_CB_COLOR0_TILE:
+	case R_0280C4_CB_COLOR1_TILE:
+	case R_0280C8_CB_COLOR2_TILE:
+	case R_0280CC_CB_COLOR3_TILE:
+	case R_0280D0_CB_COLOR4_TILE:
+	case R_0280D4_CB_COLOR5_TILE:
+	case R_0280D8_CB_COLOR6_TILE:
+	case R_0280DC_CB_COLOR7_TILE:
+		tmp = (reg - R_0280C0_CB_COLOR0_TILE) / 4;
+		if (!r600_cs_packet_next_is_pkt3_nop(p)) {
+			if (!track->cb_color_base_last[tmp]) {
+				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
+				return -EINVAL;
+			}
+			ib[idx] = track->cb_color_base_last[tmp];
+			printk_once(KERN_WARNING "You have old & broken userspace "
+					"please consider updating mesa & xf86-video-ati\n");
+			track->cb_color_tile_bo[tmp] = track->cb_color_bo[tmp];
+		} else {
+			r = r600_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
+				return -EINVAL;
+			}
+			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+			track->cb_color_tile_bo[tmp] = reloc->robj;
+		}
+		break;
+	case CB_COLOR0_BASE:
+	case CB_COLOR1_BASE:
+	case CB_COLOR2_BASE:
+	case CB_COLOR3_BASE:
+	case CB_COLOR4_BASE:
+	case CB_COLOR5_BASE:
+	case CB_COLOR6_BASE:
+	case CB_COLOR7_BASE:
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			dev_warn(p->dev, "bad SET_CONTEXT_REG "
+					"0x%04X\n", reg);
+			return -EINVAL;
+		}
+		tmp = (reg - CB_COLOR0_BASE) / 4;
+		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
+		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		track->cb_color_base_last[tmp] = ib[idx];
+		track->cb_color_bo[tmp] = reloc->robj;
+		break;
+	case DB_DEPTH_BASE:
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			dev_warn(p->dev, "bad SET_CONTEXT_REG "
+					"0x%04X\n", reg);
+			return -EINVAL;
+		}
+		track->db_offset = radeon_get_ib_value(p, idx);
+		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		track->db_bo = reloc->robj;
+		break;
+	case DB_HTILE_DATA_BASE:
+	case SQ_PGM_START_FS:
+	case SQ_PGM_START_ES:
+	case SQ_PGM_START_VS:
+	case SQ_PGM_START_GS:
+	case SQ_PGM_START_PS:
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			dev_warn(p->dev, "bad SET_CONTEXT_REG "
+					"0x%04X\n", reg);
+			return -EINVAL;
+		}
+		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		break;
+	default:
+		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static inline unsigned minify(unsigned size, unsigned levels)
+{
+	size = size >> levels;
+	if (size < 1)
+		size = 1;
+	return size;
+}
+
+static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned nlevels,
+				unsigned w0, unsigned h0, unsigned d0, unsigned bpe,
+				unsigned *l0_size, unsigned *mipmap_size)
+{
+	unsigned offset, i, level, face;
+	unsigned width, height, depth, rowstride, size;
+
+	w0 = minify(w0, 0);
+	h0 = minify(h0, 0);
+	d0 = minify(d0, 0);
+	for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) {
+		width = minify(w0, i);
+		height = minify(h0, i);
+		depth = minify(d0, i);
+		for(face = 0; face < nfaces; face++) {
+			rowstride = ((width * bpe) + 255) & ~255;
+			size = height * rowstride * depth;
+			offset += size;
+			offset = (offset + 0x1f) & ~0x1f;
+		}
+	}
+	*l0_size = (((w0 * bpe) + 255) & ~255) * h0 * d0;
+	*mipmap_size = offset;
+	if (!blevel)
+		*mipmap_size -= *l0_size;
+	if (!nlevels)
+		*mipmap_size = *l0_size;
+}
+
+/**
+ * r600_check_texture_resource() - check if register is authorized or not
+ * @p: parser structure holding parsing context
+ * @idx: index into the cs buffer
+ * @texture: texture's bo structure
+ * @mipmap: mipmap's bo structure
+ *
+ * This function will check that the resource has valid field and that
+ * the texture and mipmap bo object are big enough to cover this resource.
+ */
+static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+						struct radeon_bo *texture,
+						struct radeon_bo *mipmap)
+{
+	u32 nfaces, nlevels, blevel, w0, h0, d0, bpe = 0;
+	u32 word0, word1, l0_size, mipmap_size;
+
+	/* on legacy kernel we don't perform advanced check */
+	if (p->rdev == NULL)
+		return 0;
+	word0 = radeon_get_ib_value(p, idx + 0);
+	word1 = radeon_get_ib_value(p, idx + 1);
+	w0 = G_038000_TEX_WIDTH(word0) + 1;
+	h0 = G_038004_TEX_HEIGHT(word1) + 1;
+	d0 = G_038004_TEX_DEPTH(word1);
+	nfaces = 1;
+	switch (G_038000_DIM(word0)) {
+	case V_038000_SQ_TEX_DIM_1D:
+	case V_038000_SQ_TEX_DIM_2D:
+	case V_038000_SQ_TEX_DIM_3D:
+		break;
+	case V_038000_SQ_TEX_DIM_CUBEMAP:
+		nfaces = 6;
+		break;
+	case V_038000_SQ_TEX_DIM_1D_ARRAY:
+	case V_038000_SQ_TEX_DIM_2D_ARRAY:
+	case V_038000_SQ_TEX_DIM_2D_MSAA:
+	case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA:
+	default:
+		dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0));
+		return -EINVAL;
+	}
+	if (r600_bpe_from_format(&bpe,  G_038004_DATA_FORMAT(word1))) {
+		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
+			 __func__, __LINE__, G_038004_DATA_FORMAT(word1));
+		return -EINVAL;
+	}
+	word0 = radeon_get_ib_value(p, idx + 4);
+	word1 = radeon_get_ib_value(p, idx + 5);
+	blevel = G_038010_BASE_LEVEL(word0);
+	nlevels = G_038014_LAST_LEVEL(word1);
+	r600_texture_size(nfaces, blevel, nlevels, w0, h0, d0, bpe, &l0_size, &mipmap_size);
+	/* using get ib will give us the offset into the texture bo */
+	word0 = radeon_get_ib_value(p, idx + 2);
+	if ((l0_size + word0) > radeon_bo_size(texture)) {
+		dev_warn(p->dev, "texture bo too small (%d %d %d %d -> %d have %ld)\n",
+			w0, h0, bpe, word0, l0_size, radeon_bo_size(texture));
+		return -EINVAL;
+	}
+	/* using get ib will give us the offset into the mipmap bo */
+	word0 = radeon_get_ib_value(p, idx + 3);
+	if ((mipmap_size + word0) > radeon_bo_size(mipmap)) {
+		dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n",
+			w0, h0, bpe, blevel, nlevels, word0, mipmap_size, radeon_bo_size(texture));
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int r600_packet3_check(struct radeon_cs_parser *p,
 				struct radeon_cs_packet *pkt)
 {
@@ -408,12 +1039,22 @@
 		}
 		ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
 		ib[idx+1] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
+		r = r600_cs_track_check(p);
+		if (r) {
+			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
+			return r;
+		}
 		break;
 	case PACKET3_DRAW_INDEX_AUTO:
 		if (pkt->count != 1) {
 			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
 			return -EINVAL;
 		}
+		r = r600_cs_track_check(p);
+		if (r) {
+			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
+			return r;
+		}
 		break;
 	case PACKET3_DRAW_INDEX_IMMD_BE:
 	case PACKET3_DRAW_INDEX_IMMD:
@@ -421,6 +1062,11 @@
 			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
 			return -EINVAL;
 		}
+		r = r600_cs_track_check(p);
+		if (r) {
+			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
+			return r;
+		}
 		break;
 	case PACKET3_WAIT_REG_MEM:
 		if (pkt->count != 5) {
@@ -493,30 +1139,9 @@
 		}
 		for (i = 0; i < pkt->count; i++) {
 			reg = start_reg + (4 * i);
-			switch (reg) {
-			case SQ_ESGS_RING_BASE:
-			case SQ_GSVS_RING_BASE:
-			case SQ_ESTMP_RING_BASE:
-			case SQ_GSTMP_RING_BASE:
-			case SQ_VSTMP_RING_BASE:
-			case SQ_PSTMP_RING_BASE:
-			case SQ_FBUF_RING_BASE:
-			case SQ_REDUC_RING_BASE:
-			case SX_MEMORY_EXPORT_BASE:
-				r = r600_cs_packet_next_reloc(p, &reloc);
-				if (r) {
-					DRM_ERROR("bad SET_CONFIG_REG "
-							"0x%04X\n", reg);
-					return -EINVAL;
-				}
-				ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
-				break;
-			case CP_COHER_BASE:
-				/* use PACKET3_SURFACE_SYNC */
-				return -EINVAL;
-			default:
-				break;
-			}
+			r = r600_cs_check_reg(p, reg, idx+1+i);
+			if (r)
+				return r;
 		}
 		break;
 	case PACKET3_SET_CONTEXT_REG:
@@ -530,106 +1155,9 @@
 		}
 		for (i = 0; i < pkt->count; i++) {
 			reg = start_reg + (4 * i);
-			switch (reg) {
-			/* This register were added late, there is userspace
-			 * which does provide relocation for those but set
-			 * 0 offset. In order to avoid breaking old userspace
-			 * we detect this and set address to point to last
-			 * CB_COLOR0_BASE, note that if userspace doesn't set
-			 * CB_COLOR0_BASE before this register we will report
-			 * error. Old userspace always set CB_COLOR0_BASE
-			 * before any of this.
-			 */
-			case R_0280E0_CB_COLOR0_FRAG:
-			case R_0280E4_CB_COLOR1_FRAG:
-			case R_0280E8_CB_COLOR2_FRAG:
-			case R_0280EC_CB_COLOR3_FRAG:
-			case R_0280F0_CB_COLOR4_FRAG:
-			case R_0280F4_CB_COLOR5_FRAG:
-			case R_0280F8_CB_COLOR6_FRAG:
-			case R_0280FC_CB_COLOR7_FRAG:
-			case R_0280C0_CB_COLOR0_TILE:
-			case R_0280C4_CB_COLOR1_TILE:
-			case R_0280C8_CB_COLOR2_TILE:
-			case R_0280CC_CB_COLOR3_TILE:
-			case R_0280D0_CB_COLOR4_TILE:
-			case R_0280D4_CB_COLOR5_TILE:
-			case R_0280D8_CB_COLOR6_TILE:
-			case R_0280DC_CB_COLOR7_TILE:
-				if (!r600_cs_packet_next_is_pkt3_nop(p)) {
-					if (!track->cb_color0_base_last) {
-						dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
-						return -EINVAL;
-					}
-					ib[idx+1+i] = track->cb_color0_base_last;
-					printk_once(KERN_WARNING "radeon: You have old & broken userspace "
-						"please consider updating mesa & xf86-video-ati\n");
-				} else {
-					r = r600_cs_packet_next_reloc(p, &reloc);
-					if (r) {
-						dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
-						return -EINVAL;
-					}
-					ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
-				}
-				break;
-			case DB_DEPTH_BASE:
-			case DB_HTILE_DATA_BASE:
-			case CB_COLOR0_BASE:
-				r = r600_cs_packet_next_reloc(p, &reloc);
-				if (r) {
-					DRM_ERROR("bad SET_CONTEXT_REG "
-							"0x%04X\n", reg);
-					return -EINVAL;
-				}
-				ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
-				track->cb_color0_base_last = ib[idx+1+i];
-				break;
-			case CB_COLOR1_BASE:
-			case CB_COLOR2_BASE:
-			case CB_COLOR3_BASE:
-			case CB_COLOR4_BASE:
-			case CB_COLOR5_BASE:
-			case CB_COLOR6_BASE:
-			case CB_COLOR7_BASE:
-			case SQ_PGM_START_FS:
-			case SQ_PGM_START_ES:
-			case SQ_PGM_START_VS:
-			case SQ_PGM_START_GS:
-			case SQ_PGM_START_PS:
-				r = r600_cs_packet_next_reloc(p, &reloc);
-				if (r) {
-					DRM_ERROR("bad SET_CONTEXT_REG "
-							"0x%04X\n", reg);
-					return -EINVAL;
-				}
-				ib[idx+1+i] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
-				break;
-			case VGT_DMA_BASE:
-			case VGT_DMA_BASE_HI:
-				/* These should be handled by DRAW_INDEX packet 3 */
-			case VGT_STRMOUT_BASE_OFFSET_0:
-			case VGT_STRMOUT_BASE_OFFSET_1:
-			case VGT_STRMOUT_BASE_OFFSET_2:
-			case VGT_STRMOUT_BASE_OFFSET_3:
-			case VGT_STRMOUT_BASE_OFFSET_HI_0:
-			case VGT_STRMOUT_BASE_OFFSET_HI_1:
-			case VGT_STRMOUT_BASE_OFFSET_HI_2:
-			case VGT_STRMOUT_BASE_OFFSET_HI_3:
-			case VGT_STRMOUT_BUFFER_BASE_0:
-			case VGT_STRMOUT_BUFFER_BASE_1:
-			case VGT_STRMOUT_BUFFER_BASE_2:
-			case VGT_STRMOUT_BUFFER_BASE_3:
-			case VGT_STRMOUT_BUFFER_OFFSET_0:
-			case VGT_STRMOUT_BUFFER_OFFSET_1:
-			case VGT_STRMOUT_BUFFER_OFFSET_2:
-			case VGT_STRMOUT_BUFFER_OFFSET_3:
-				/* These should be handled by STRMOUT_BUFFER packet 3 */
-				DRM_ERROR("bad context reg: 0x%08x\n", reg);
-				return -EINVAL;
-			default:
-				break;
-			}
+			r = r600_cs_check_reg(p, reg, idx+1+i);
+			if (r)
+				return r;
 		}
 		break;
 	case PACKET3_SET_RESOURCE:
@@ -646,6 +1174,9 @@
 			return -EINVAL;
 		}
 		for (i = 0; i < (pkt->count / 7); i++) {
+			struct radeon_bo *texture, *mipmap;
+			u32 size, offset;
+
 			switch (G__SQ_VTX_CONSTANT_TYPE(radeon_get_ib_value(p, idx+(i*7)+6+1))) {
 			case SQ_TEX_VTX_VALID_TEXTURE:
 				/* tex base */
@@ -655,6 +1186,7 @@
 					return -EINVAL;
 				}
 				ib[idx+1+(i*7)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				texture = reloc->robj;
 				/* tex mip base */
 				r = r600_cs_packet_next_reloc(p, &reloc);
 				if (r) {
@@ -662,6 +1194,11 @@
 					return -EINVAL;
 				}
 				ib[idx+1+(i*7)+3] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				mipmap = reloc->robj;
+				r = r600_check_texture_resource(p,  idx+(i*7)+1,
+						texture, mipmap);
+				if (r)
+					return r;
 				break;
 			case SQ_TEX_VTX_VALID_BUFFER:
 				/* vtx base */
@@ -670,6 +1207,13 @@
 					DRM_ERROR("bad SET_RESOURCE\n");
 					return -EINVAL;
 				}
+				offset = radeon_get_ib_value(p, idx+1+(i*7)+0);
+				size = radeon_get_ib_value(p, idx+1+(i*7)+1);
+				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
+					/* force size to size of the buffer */
+					dev_warn(p->dev, "vbo resource seems too big for the bo\n");
+					ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj);
+				}
 				ib[idx+1+(i*7)+0] += (u32)((reloc->lobj.gpu_offset) & 0xffffffff);
 				ib[idx+1+(i*7)+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
 				break;
@@ -760,11 +1304,28 @@
 	struct r600_cs_track *track;
 	int r;
 
-	track = kzalloc(sizeof(*track), GFP_KERNEL);
-	p->track = track;
+	if (p->track == NULL) {
+		/* initialize tracker, we are in kms */
+		track = kzalloc(sizeof(*track), GFP_KERNEL);
+		if (track == NULL)
+			return -ENOMEM;
+		r600_cs_track_init(track);
+		if (p->rdev->family < CHIP_RV770) {
+			track->npipes = p->rdev->config.r600.tiling_npipes;
+			track->nbanks = p->rdev->config.r600.tiling_nbanks;
+			track->group_size = p->rdev->config.r600.tiling_group_size;
+		} else if (p->rdev->family <= CHIP_RV740) {
+			track->npipes = p->rdev->config.rv770.tiling_npipes;
+			track->nbanks = p->rdev->config.rv770.tiling_nbanks;
+			track->group_size = p->rdev->config.rv770.tiling_group_size;
+		}
+		p->track = track;
+	}
 	do {
 		r = r600_cs_packet_parse(p, &pkt, p->idx);
 		if (r) {
+			kfree(p->track);
+			p->track = NULL;
 			return r;
 		}
 		p->idx += pkt.count + 2;
@@ -779,9 +1340,13 @@
 			break;
 		default:
 			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+			kfree(p->track);
+			p->track = NULL;
 			return -EINVAL;
 		}
 		if (r) {
+			kfree(p->track);
+			p->track = NULL;
 			return r;
 		}
 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
@@ -791,6 +1356,8 @@
 		mdelay(1);
 	}
 #endif
+	kfree(p->track);
+	p->track = NULL;
 	return 0;
 }
 
@@ -833,9 +1400,16 @@
 {
 	struct radeon_cs_parser parser;
 	struct radeon_cs_chunk *ib_chunk;
-	struct radeon_ib	fake_ib;
+	struct radeon_ib fake_ib;
+	struct r600_cs_track *track;
 	int r;
 
+	/* initialize tracker */
+	track = kzalloc(sizeof(*track), GFP_KERNEL);
+	if (track == NULL)
+		return -ENOMEM;
+	r600_cs_track_init(track);
+	r600_cs_legacy_get_tiling_conf(dev, &track->npipes, &track->nbanks, &track->group_size);
 	/* initialize parser */
 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
 	parser.filp = filp;
@@ -843,6 +1417,7 @@
 	parser.rdev = NULL;
 	parser.family = family;
 	parser.ib = &fake_ib;
+	parser.track = track;
 	fake_ib.ptr = ib;
 	r = radeon_cs_parser_init(&parser, data);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 3048088..5b2e4d4 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -883,6 +883,16 @@
 
 #define R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL		0x5480
 
+#define R_028C04_PA_SC_AA_CONFIG                     0x028C04
+#define   S_028C04_MSAA_NUM_SAMPLES(x)                 (((x) & 0x3) << 0)
+#define   G_028C04_MSAA_NUM_SAMPLES(x)                 (((x) >> 0) & 0x3)
+#define   C_028C04_MSAA_NUM_SAMPLES                    0xFFFFFFFC
+#define   S_028C04_AA_MASK_CENTROID_DTMN(x)            (((x) & 0x1) << 4)
+#define   G_028C04_AA_MASK_CENTROID_DTMN(x)            (((x) >> 4) & 0x1)
+#define   C_028C04_AA_MASK_CENTROID_DTMN               0xFFFFFFEF
+#define   S_028C04_MAX_SAMPLE_DIST(x)                  (((x) & 0xF) << 13)
+#define   G_028C04_MAX_SAMPLE_DIST(x)                  (((x) >> 13) & 0xF)
+#define   C_028C04_MAX_SAMPLE_DIST                     0xFFFE1FFF
 #define R_0280E0_CB_COLOR0_FRAG                      0x0280E0
 #define   S_0280E0_BASE_256B(x)                        (((x) & 0xFFFFFFFF) << 0)
 #define   G_0280E0_BASE_256B(x)                        (((x) >> 0) & 0xFFFFFFFF)
@@ -905,6 +915,461 @@
 #define R_0280D4_CB_COLOR5_TILE                      0x0280D4
 #define R_0280D8_CB_COLOR6_TILE                      0x0280D8
 #define R_0280DC_CB_COLOR7_TILE                      0x0280DC
-
+#define R_0280A0_CB_COLOR0_INFO                      0x0280A0
+#define   S_0280A0_ENDIAN(x)                           (((x) & 0x3) << 0)
+#define   G_0280A0_ENDIAN(x)                           (((x) >> 0) & 0x3)
+#define   C_0280A0_ENDIAN                              0xFFFFFFFC
+#define   S_0280A0_FORMAT(x)                           (((x) & 0x3F) << 2)
+#define   G_0280A0_FORMAT(x)                           (((x) >> 2) & 0x3F)
+#define   C_0280A0_FORMAT                              0xFFFFFF03
+#define     V_0280A0_COLOR_INVALID                     0x00000000
+#define     V_0280A0_COLOR_8                           0x00000001
+#define     V_0280A0_COLOR_4_4                         0x00000002
+#define     V_0280A0_COLOR_3_3_2                       0x00000003
+#define     V_0280A0_COLOR_16                          0x00000005
+#define     V_0280A0_COLOR_16_FLOAT                    0x00000006
+#define     V_0280A0_COLOR_8_8                         0x00000007
+#define     V_0280A0_COLOR_5_6_5                       0x00000008
+#define     V_0280A0_COLOR_6_5_5                       0x00000009
+#define     V_0280A0_COLOR_1_5_5_5                     0x0000000A
+#define     V_0280A0_COLOR_4_4_4_4                     0x0000000B
+#define     V_0280A0_COLOR_5_5_5_1                     0x0000000C
+#define     V_0280A0_COLOR_32                          0x0000000D
+#define     V_0280A0_COLOR_32_FLOAT                    0x0000000E
+#define     V_0280A0_COLOR_16_16                       0x0000000F
+#define     V_0280A0_COLOR_16_16_FLOAT                 0x00000010
+#define     V_0280A0_COLOR_8_24                        0x00000011
+#define     V_0280A0_COLOR_8_24_FLOAT                  0x00000012
+#define     V_0280A0_COLOR_24_8                        0x00000013
+#define     V_0280A0_COLOR_24_8_FLOAT                  0x00000014
+#define     V_0280A0_COLOR_10_11_11                    0x00000015
+#define     V_0280A0_COLOR_10_11_11_FLOAT              0x00000016
+#define     V_0280A0_COLOR_11_11_10                    0x00000017
+#define     V_0280A0_COLOR_11_11_10_FLOAT              0x00000018
+#define     V_0280A0_COLOR_2_10_10_10                  0x00000019
+#define     V_0280A0_COLOR_8_8_8_8                     0x0000001A
+#define     V_0280A0_COLOR_10_10_10_2                  0x0000001B
+#define     V_0280A0_COLOR_X24_8_32_FLOAT              0x0000001C
+#define     V_0280A0_COLOR_32_32                       0x0000001D
+#define     V_0280A0_COLOR_32_32_FLOAT                 0x0000001E
+#define     V_0280A0_COLOR_16_16_16_16                 0x0000001F
+#define     V_0280A0_COLOR_16_16_16_16_FLOAT           0x00000020
+#define     V_0280A0_COLOR_32_32_32_32                 0x00000022
+#define     V_0280A0_COLOR_32_32_32_32_FLOAT           0x00000023
+#define   S_0280A0_ARRAY_MODE(x)                       (((x) & 0xF) << 8)
+#define   G_0280A0_ARRAY_MODE(x)                       (((x) >> 8) & 0xF)
+#define   C_0280A0_ARRAY_MODE                          0xFFFFF0FF
+#define     V_0280A0_ARRAY_LINEAR_GENERAL              0x00000000
+#define     V_0280A0_ARRAY_LINEAR_ALIGNED              0x00000001
+#define     V_0280A0_ARRAY_1D_TILED_THIN1              0x00000002
+#define     V_0280A0_ARRAY_2D_TILED_THIN1              0x00000004
+#define   S_0280A0_NUMBER_TYPE(x)                      (((x) & 0x7) << 12)
+#define   G_0280A0_NUMBER_TYPE(x)                      (((x) >> 12) & 0x7)
+#define   C_0280A0_NUMBER_TYPE                         0xFFFF8FFF
+#define   S_0280A0_READ_SIZE(x)                        (((x) & 0x1) << 15)
+#define   G_0280A0_READ_SIZE(x)                        (((x) >> 15) & 0x1)
+#define   C_0280A0_READ_SIZE                           0xFFFF7FFF
+#define   S_0280A0_COMP_SWAP(x)                        (((x) & 0x3) << 16)
+#define   G_0280A0_COMP_SWAP(x)                        (((x) >> 16) & 0x3)
+#define   C_0280A0_COMP_SWAP                           0xFFFCFFFF
+#define   S_0280A0_TILE_MODE(x)                        (((x) & 0x3) << 18)
+#define   G_0280A0_TILE_MODE(x)                        (((x) >> 18) & 0x3)
+#define   C_0280A0_TILE_MODE                           0xFFF3FFFF
+#define   S_0280A0_BLEND_CLAMP(x)                      (((x) & 0x1) << 20)
+#define   G_0280A0_BLEND_CLAMP(x)                      (((x) >> 20) & 0x1)
+#define   C_0280A0_BLEND_CLAMP                         0xFFEFFFFF
+#define   S_0280A0_CLEAR_COLOR(x)                      (((x) & 0x1) << 21)
+#define   G_0280A0_CLEAR_COLOR(x)                      (((x) >> 21) & 0x1)
+#define   C_0280A0_CLEAR_COLOR                         0xFFDFFFFF
+#define   S_0280A0_BLEND_BYPASS(x)                     (((x) & 0x1) << 22)
+#define   G_0280A0_BLEND_BYPASS(x)                     (((x) >> 22) & 0x1)
+#define   C_0280A0_BLEND_BYPASS                        0xFFBFFFFF
+#define   S_0280A0_BLEND_FLOAT32(x)                    (((x) & 0x1) << 23)
+#define   G_0280A0_BLEND_FLOAT32(x)                    (((x) >> 23) & 0x1)
+#define   C_0280A0_BLEND_FLOAT32                       0xFF7FFFFF
+#define   S_0280A0_SIMPLE_FLOAT(x)                     (((x) & 0x1) << 24)
+#define   G_0280A0_SIMPLE_FLOAT(x)                     (((x) >> 24) & 0x1)
+#define   C_0280A0_SIMPLE_FLOAT                        0xFEFFFFFF
+#define   S_0280A0_ROUND_MODE(x)                       (((x) & 0x1) << 25)
+#define   G_0280A0_ROUND_MODE(x)                       (((x) >> 25) & 0x1)
+#define   C_0280A0_ROUND_MODE                          0xFDFFFFFF
+#define   S_0280A0_TILE_COMPACT(x)                     (((x) & 0x1) << 26)
+#define   G_0280A0_TILE_COMPACT(x)                     (((x) >> 26) & 0x1)
+#define   C_0280A0_TILE_COMPACT                        0xFBFFFFFF
+#define   S_0280A0_SOURCE_FORMAT(x)                    (((x) & 0x1) << 27)
+#define   G_0280A0_SOURCE_FORMAT(x)                    (((x) >> 27) & 0x1)
+#define   C_0280A0_SOURCE_FORMAT                       0xF7FFFFFF
+#define R_0280A4_CB_COLOR1_INFO                      0x0280A4
+#define R_0280A8_CB_COLOR2_INFO                      0x0280A8
+#define R_0280AC_CB_COLOR3_INFO                      0x0280AC
+#define R_0280B0_CB_COLOR4_INFO                      0x0280B0
+#define R_0280B4_CB_COLOR5_INFO                      0x0280B4
+#define R_0280B8_CB_COLOR6_INFO                      0x0280B8
+#define R_0280BC_CB_COLOR7_INFO                      0x0280BC
+#define R_028060_CB_COLOR0_SIZE                      0x028060
+#define   S_028060_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
+#define   G_028060_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
+#define   C_028060_PITCH_TILE_MAX                      0xFFFFFC00
+#define   S_028060_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
+#define   G_028060_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
+#define   C_028060_SLICE_TILE_MAX                      0xC00003FF
+#define R_028064_CB_COLOR1_SIZE                      0x028064
+#define R_028068_CB_COLOR2_SIZE                      0x028068
+#define R_02806C_CB_COLOR3_SIZE                      0x02806C
+#define R_028070_CB_COLOR4_SIZE                      0x028070
+#define R_028074_CB_COLOR5_SIZE                      0x028074
+#define R_028078_CB_COLOR6_SIZE                      0x028078
+#define R_02807C_CB_COLOR7_SIZE                      0x02807C
+#define R_028238_CB_TARGET_MASK                      0x028238
+#define   S_028238_TARGET0_ENABLE(x)                   (((x) & 0xF) << 0)
+#define   G_028238_TARGET0_ENABLE(x)                   (((x) >> 0) & 0xF)
+#define   C_028238_TARGET0_ENABLE                      0xFFFFFFF0
+#define   S_028238_TARGET1_ENABLE(x)                   (((x) & 0xF) << 4)
+#define   G_028238_TARGET1_ENABLE(x)                   (((x) >> 4) & 0xF)
+#define   C_028238_TARGET1_ENABLE                      0xFFFFFF0F
+#define   S_028238_TARGET2_ENABLE(x)                   (((x) & 0xF) << 8)
+#define   G_028238_TARGET2_ENABLE(x)                   (((x) >> 8) & 0xF)
+#define   C_028238_TARGET2_ENABLE                      0xFFFFF0FF
+#define   S_028238_TARGET3_ENABLE(x)                   (((x) & 0xF) << 12)
+#define   G_028238_TARGET3_ENABLE(x)                   (((x) >> 12) & 0xF)
+#define   C_028238_TARGET3_ENABLE                      0xFFFF0FFF
+#define   S_028238_TARGET4_ENABLE(x)                   (((x) & 0xF) << 16)
+#define   G_028238_TARGET4_ENABLE(x)                   (((x) >> 16) & 0xF)
+#define   C_028238_TARGET4_ENABLE                      0xFFF0FFFF
+#define   S_028238_TARGET5_ENABLE(x)                   (((x) & 0xF) << 20)
+#define   G_028238_TARGET5_ENABLE(x)                   (((x) >> 20) & 0xF)
+#define   C_028238_TARGET5_ENABLE                      0xFF0FFFFF
+#define   S_028238_TARGET6_ENABLE(x)                   (((x) & 0xF) << 24)
+#define   G_028238_TARGET6_ENABLE(x)                   (((x) >> 24) & 0xF)
+#define   C_028238_TARGET6_ENABLE                      0xF0FFFFFF
+#define   S_028238_TARGET7_ENABLE(x)                   (((x) & 0xF) << 28)
+#define   G_028238_TARGET7_ENABLE(x)                   (((x) >> 28) & 0xF)
+#define   C_028238_TARGET7_ENABLE                      0x0FFFFFFF
+#define R_02823C_CB_SHADER_MASK                      0x02823C
+#define   S_02823C_OUTPUT0_ENABLE(x)                   (((x) & 0xF) << 0)
+#define   G_02823C_OUTPUT0_ENABLE(x)                   (((x) >> 0) & 0xF)
+#define   C_02823C_OUTPUT0_ENABLE                      0xFFFFFFF0
+#define   S_02823C_OUTPUT1_ENABLE(x)                   (((x) & 0xF) << 4)
+#define   G_02823C_OUTPUT1_ENABLE(x)                   (((x) >> 4) & 0xF)
+#define   C_02823C_OUTPUT1_ENABLE                      0xFFFFFF0F
+#define   S_02823C_OUTPUT2_ENABLE(x)                   (((x) & 0xF) << 8)
+#define   G_02823C_OUTPUT2_ENABLE(x)                   (((x) >> 8) & 0xF)
+#define   C_02823C_OUTPUT2_ENABLE                      0xFFFFF0FF
+#define   S_02823C_OUTPUT3_ENABLE(x)                   (((x) & 0xF) << 12)
+#define   G_02823C_OUTPUT3_ENABLE(x)                   (((x) >> 12) & 0xF)
+#define   C_02823C_OUTPUT3_ENABLE                      0xFFFF0FFF
+#define   S_02823C_OUTPUT4_ENABLE(x)                   (((x) & 0xF) << 16)
+#define   G_02823C_OUTPUT4_ENABLE(x)                   (((x) >> 16) & 0xF)
+#define   C_02823C_OUTPUT4_ENABLE                      0xFFF0FFFF
+#define   S_02823C_OUTPUT5_ENABLE(x)                   (((x) & 0xF) << 20)
+#define   G_02823C_OUTPUT5_ENABLE(x)                   (((x) >> 20) & 0xF)
+#define   C_02823C_OUTPUT5_ENABLE                      0xFF0FFFFF
+#define   S_02823C_OUTPUT6_ENABLE(x)                   (((x) & 0xF) << 24)
+#define   G_02823C_OUTPUT6_ENABLE(x)                   (((x) >> 24) & 0xF)
+#define   C_02823C_OUTPUT6_ENABLE                      0xF0FFFFFF
+#define   S_02823C_OUTPUT7_ENABLE(x)                   (((x) & 0xF) << 28)
+#define   G_02823C_OUTPUT7_ENABLE(x)                   (((x) >> 28) & 0xF)
+#define   C_02823C_OUTPUT7_ENABLE                      0x0FFFFFFF
+#define R_028AB0_VGT_STRMOUT_EN                      0x028AB0
+#define   S_028AB0_STREAMOUT(x)                        (((x) & 0x1) << 0)
+#define   G_028AB0_STREAMOUT(x)                        (((x) >> 0) & 0x1)
+#define   C_028AB0_STREAMOUT                           0xFFFFFFFE
+#define R_028B20_VGT_STRMOUT_BUFFER_EN               0x028B20
+#define   S_028B20_BUFFER_0_EN(x)                      (((x) & 0x1) << 0)
+#define   G_028B20_BUFFER_0_EN(x)                      (((x) >> 0) & 0x1)
+#define   C_028B20_BUFFER_0_EN                         0xFFFFFFFE
+#define   S_028B20_BUFFER_1_EN(x)                      (((x) & 0x1) << 1)
+#define   G_028B20_BUFFER_1_EN(x)                      (((x) >> 1) & 0x1)
+#define   C_028B20_BUFFER_1_EN                         0xFFFFFFFD
+#define   S_028B20_BUFFER_2_EN(x)                      (((x) & 0x1) << 2)
+#define   G_028B20_BUFFER_2_EN(x)                      (((x) >> 2) & 0x1)
+#define   C_028B20_BUFFER_2_EN                         0xFFFFFFFB
+#define   S_028B20_BUFFER_3_EN(x)                      (((x) & 0x1) << 3)
+#define   G_028B20_BUFFER_3_EN(x)                      (((x) >> 3) & 0x1)
+#define   C_028B20_BUFFER_3_EN                         0xFFFFFFF7
+#define   S_028B20_SIZE(x)                             (((x) & 0xFFFFFFFF) << 0)
+#define   G_028B20_SIZE(x)                             (((x) >> 0) & 0xFFFFFFFF)
+#define   C_028B20_SIZE                                0x00000000
+#define R_038000_SQ_TEX_RESOURCE_WORD0_0             0x038000
+#define   S_038000_DIM(x)                              (((x) & 0x7) << 0)
+#define   G_038000_DIM(x)                              (((x) >> 0) & 0x7)
+#define   C_038000_DIM                                 0xFFFFFFF8
+#define     V_038000_SQ_TEX_DIM_1D                     0x00000000
+#define     V_038000_SQ_TEX_DIM_2D                     0x00000001
+#define     V_038000_SQ_TEX_DIM_3D                     0x00000002
+#define     V_038000_SQ_TEX_DIM_CUBEMAP                0x00000003
+#define     V_038000_SQ_TEX_DIM_1D_ARRAY               0x00000004
+#define     V_038000_SQ_TEX_DIM_2D_ARRAY               0x00000005
+#define     V_038000_SQ_TEX_DIM_2D_MSAA                0x00000006
+#define     V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA          0x00000007
+#define   S_038000_TILE_MODE(x)                        (((x) & 0xF) << 3)
+#define   G_038000_TILE_MODE(x)                        (((x) >> 3) & 0xF)
+#define   C_038000_TILE_MODE                           0xFFFFFF87
+#define   S_038000_TILE_TYPE(x)                        (((x) & 0x1) << 7)
+#define   G_038000_TILE_TYPE(x)                        (((x) >> 7) & 0x1)
+#define   C_038000_TILE_TYPE                           0xFFFFFF7F
+#define   S_038000_PITCH(x)                            (((x) & 0x7FF) << 8)
+#define   G_038000_PITCH(x)                            (((x) >> 8) & 0x7FF)
+#define   C_038000_PITCH                               0xFFF800FF
+#define   S_038000_TEX_WIDTH(x)                        (((x) & 0x1FFF) << 19)
+#define   G_038000_TEX_WIDTH(x)                        (((x) >> 19) & 0x1FFF)
+#define   C_038000_TEX_WIDTH                           0x0007FFFF
+#define R_038004_SQ_TEX_RESOURCE_WORD1_0             0x038004
+#define   S_038004_TEX_HEIGHT(x)                       (((x) & 0x1FFF) << 0)
+#define   G_038004_TEX_HEIGHT(x)                       (((x) >> 0) & 0x1FFF)
+#define   C_038004_TEX_HEIGHT                          0xFFFFE000
+#define   S_038004_TEX_DEPTH(x)                        (((x) & 0x1FFF) << 13)
+#define   G_038004_TEX_DEPTH(x)                        (((x) >> 13) & 0x1FFF)
+#define   C_038004_TEX_DEPTH                           0xFC001FFF
+#define   S_038004_DATA_FORMAT(x)                      (((x) & 0x3F) << 26)
+#define   G_038004_DATA_FORMAT(x)                      (((x) >> 26) & 0x3F)
+#define   C_038004_DATA_FORMAT                         0x03FFFFFF
+#define     V_038004_COLOR_INVALID                     0x00000000
+#define     V_038004_COLOR_8                           0x00000001
+#define     V_038004_COLOR_4_4                         0x00000002
+#define     V_038004_COLOR_3_3_2                       0x00000003
+#define     V_038004_COLOR_16                          0x00000005
+#define     V_038004_COLOR_16_FLOAT                    0x00000006
+#define     V_038004_COLOR_8_8                         0x00000007
+#define     V_038004_COLOR_5_6_5                       0x00000008
+#define     V_038004_COLOR_6_5_5                       0x00000009
+#define     V_038004_COLOR_1_5_5_5                     0x0000000A
+#define     V_038004_COLOR_4_4_4_4                     0x0000000B
+#define     V_038004_COLOR_5_5_5_1                     0x0000000C
+#define     V_038004_COLOR_32                          0x0000000D
+#define     V_038004_COLOR_32_FLOAT                    0x0000000E
+#define     V_038004_COLOR_16_16                       0x0000000F
+#define     V_038004_COLOR_16_16_FLOAT                 0x00000010
+#define     V_038004_COLOR_8_24                        0x00000011
+#define     V_038004_COLOR_8_24_FLOAT                  0x00000012
+#define     V_038004_COLOR_24_8                        0x00000013
+#define     V_038004_COLOR_24_8_FLOAT                  0x00000014
+#define     V_038004_COLOR_10_11_11                    0x00000015
+#define     V_038004_COLOR_10_11_11_FLOAT              0x00000016
+#define     V_038004_COLOR_11_11_10                    0x00000017
+#define     V_038004_COLOR_11_11_10_FLOAT              0x00000018
+#define     V_038004_COLOR_2_10_10_10                  0x00000019
+#define     V_038004_COLOR_8_8_8_8                     0x0000001A
+#define     V_038004_COLOR_10_10_10_2                  0x0000001B
+#define     V_038004_COLOR_X24_8_32_FLOAT              0x0000001C
+#define     V_038004_COLOR_32_32                       0x0000001D
+#define     V_038004_COLOR_32_32_FLOAT                 0x0000001E
+#define     V_038004_COLOR_16_16_16_16                 0x0000001F
+#define     V_038004_COLOR_16_16_16_16_FLOAT           0x00000020
+#define     V_038004_COLOR_32_32_32_32                 0x00000022
+#define     V_038004_COLOR_32_32_32_32_FLOAT           0x00000023
+#define     V_038004_FMT_1                             0x00000025
+#define     V_038004_FMT_GB_GR                         0x00000027
+#define     V_038004_FMT_BG_RG                         0x00000028
+#define     V_038004_FMT_32_AS_8                       0x00000029
+#define     V_038004_FMT_32_AS_8_8                     0x0000002A
+#define     V_038004_FMT_5_9_9_9_SHAREDEXP             0x0000002B
+#define     V_038004_FMT_8_8_8                         0x0000002C
+#define     V_038004_FMT_16_16_16                      0x0000002D
+#define     V_038004_FMT_16_16_16_FLOAT                0x0000002E
+#define     V_038004_FMT_32_32_32                      0x0000002F
+#define     V_038004_FMT_32_32_32_FLOAT                0x00000030
+#define R_038010_SQ_TEX_RESOURCE_WORD4_0             0x038010
+#define   S_038010_FORMAT_COMP_X(x)                    (((x) & 0x3) << 0)
+#define   G_038010_FORMAT_COMP_X(x)                    (((x) >> 0) & 0x3)
+#define   C_038010_FORMAT_COMP_X                       0xFFFFFFFC
+#define   S_038010_FORMAT_COMP_Y(x)                    (((x) & 0x3) << 2)
+#define   G_038010_FORMAT_COMP_Y(x)                    (((x) >> 2) & 0x3)
+#define   C_038010_FORMAT_COMP_Y                       0xFFFFFFF3
+#define   S_038010_FORMAT_COMP_Z(x)                    (((x) & 0x3) << 4)
+#define   G_038010_FORMAT_COMP_Z(x)                    (((x) >> 4) & 0x3)
+#define   C_038010_FORMAT_COMP_Z                       0xFFFFFFCF
+#define   S_038010_FORMAT_COMP_W(x)                    (((x) & 0x3) << 6)
+#define   G_038010_FORMAT_COMP_W(x)                    (((x) >> 6) & 0x3)
+#define   C_038010_FORMAT_COMP_W                       0xFFFFFF3F
+#define   S_038010_NUM_FORMAT_ALL(x)                   (((x) & 0x3) << 8)
+#define   G_038010_NUM_FORMAT_ALL(x)                   (((x) >> 8) & 0x3)
+#define   C_038010_NUM_FORMAT_ALL                      0xFFFFFCFF
+#define   S_038010_SRF_MODE_ALL(x)                     (((x) & 0x1) << 10)
+#define   G_038010_SRF_MODE_ALL(x)                     (((x) >> 10) & 0x1)
+#define   C_038010_SRF_MODE_ALL                        0xFFFFFBFF
+#define   S_038010_FORCE_DEGAMMA(x)                    (((x) & 0x1) << 11)
+#define   G_038010_FORCE_DEGAMMA(x)                    (((x) >> 11) & 0x1)
+#define   C_038010_FORCE_DEGAMMA                       0xFFFFF7FF
+#define   S_038010_ENDIAN_SWAP(x)                      (((x) & 0x3) << 12)
+#define   G_038010_ENDIAN_SWAP(x)                      (((x) >> 12) & 0x3)
+#define   C_038010_ENDIAN_SWAP                         0xFFFFCFFF
+#define   S_038010_REQUEST_SIZE(x)                     (((x) & 0x3) << 14)
+#define   G_038010_REQUEST_SIZE(x)                     (((x) >> 14) & 0x3)
+#define   C_038010_REQUEST_SIZE                        0xFFFF3FFF
+#define   S_038010_DST_SEL_X(x)                        (((x) & 0x7) << 16)
+#define   G_038010_DST_SEL_X(x)                        (((x) >> 16) & 0x7)
+#define   C_038010_DST_SEL_X                           0xFFF8FFFF
+#define   S_038010_DST_SEL_Y(x)                        (((x) & 0x7) << 19)
+#define   G_038010_DST_SEL_Y(x)                        (((x) >> 19) & 0x7)
+#define   C_038010_DST_SEL_Y                           0xFFC7FFFF
+#define   S_038010_DST_SEL_Z(x)                        (((x) & 0x7) << 22)
+#define   G_038010_DST_SEL_Z(x)                        (((x) >> 22) & 0x7)
+#define   C_038010_DST_SEL_Z                           0xFE3FFFFF
+#define   S_038010_DST_SEL_W(x)                        (((x) & 0x7) << 25)
+#define   G_038010_DST_SEL_W(x)                        (((x) >> 25) & 0x7)
+#define   C_038010_DST_SEL_W                           0xF1FFFFFF
+#define   S_038010_BASE_LEVEL(x)                       (((x) & 0xF) << 28)
+#define   G_038010_BASE_LEVEL(x)                       (((x) >> 28) & 0xF)
+#define   C_038010_BASE_LEVEL                          0x0FFFFFFF
+#define R_038014_SQ_TEX_RESOURCE_WORD5_0             0x038014
+#define   S_038014_LAST_LEVEL(x)                       (((x) & 0xF) << 0)
+#define   G_038014_LAST_LEVEL(x)                       (((x) >> 0) & 0xF)
+#define   C_038014_LAST_LEVEL                          0xFFFFFFF0
+#define   S_038014_BASE_ARRAY(x)                       (((x) & 0x1FFF) << 4)
+#define   G_038014_BASE_ARRAY(x)                       (((x) >> 4) & 0x1FFF)
+#define   C_038014_BASE_ARRAY                          0xFFFE000F
+#define   S_038014_LAST_ARRAY(x)                       (((x) & 0x1FFF) << 17)
+#define   G_038014_LAST_ARRAY(x)                       (((x) >> 17) & 0x1FFF)
+#define   C_038014_LAST_ARRAY                          0xC001FFFF
+#define R_0288A8_SQ_ESGS_RING_ITEMSIZE               0x0288A8
+#define   S_0288A8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288A8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288A8_ITEMSIZE                            0xFFFF8000
+#define R_008C44_SQ_ESGS_RING_SIZE                   0x008C44
+#define   S_008C44_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C44_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C44_MEM_SIZE                            0x00000000
+#define R_0288B0_SQ_ESTMP_RING_ITEMSIZE              0x0288B0
+#define   S_0288B0_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288B0_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288B0_ITEMSIZE                            0xFFFF8000
+#define R_008C54_SQ_ESTMP_RING_SIZE                  0x008C54
+#define   S_008C54_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C54_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C54_MEM_SIZE                            0x00000000
+#define R_0288C0_SQ_FBUF_RING_ITEMSIZE               0x0288C0
+#define   S_0288C0_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288C0_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288C0_ITEMSIZE                            0xFFFF8000
+#define R_008C74_SQ_FBUF_RING_SIZE                   0x008C74
+#define   S_008C74_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C74_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C74_MEM_SIZE                            0x00000000
+#define R_0288B4_SQ_GSTMP_RING_ITEMSIZE              0x0288B4
+#define   S_0288B4_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288B4_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288B4_ITEMSIZE                            0xFFFF8000
+#define R_008C5C_SQ_GSTMP_RING_SIZE                  0x008C5C
+#define   S_008C5C_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C5C_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C5C_MEM_SIZE                            0x00000000
+#define R_0288AC_SQ_GSVS_RING_ITEMSIZE               0x0288AC
+#define   S_0288AC_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288AC_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288AC_ITEMSIZE                            0xFFFF8000
+#define R_008C4C_SQ_GSVS_RING_SIZE                   0x008C4C
+#define   S_008C4C_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C4C_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C4C_MEM_SIZE                            0x00000000
+#define R_0288BC_SQ_PSTMP_RING_ITEMSIZE              0x0288BC
+#define   S_0288BC_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288BC_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288BC_ITEMSIZE                            0xFFFF8000
+#define R_008C6C_SQ_PSTMP_RING_SIZE                  0x008C6C
+#define   S_008C6C_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C6C_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C6C_MEM_SIZE                            0x00000000
+#define R_0288C4_SQ_REDUC_RING_ITEMSIZE              0x0288C4
+#define   S_0288C4_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288C4_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288C4_ITEMSIZE                            0xFFFF8000
+#define R_008C7C_SQ_REDUC_RING_SIZE                  0x008C7C
+#define   S_008C7C_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C7C_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C7C_MEM_SIZE                            0x00000000
+#define R_0288B8_SQ_VSTMP_RING_ITEMSIZE              0x0288B8
+#define   S_0288B8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288B8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288B8_ITEMSIZE                            0xFFFF8000
+#define R_008C64_SQ_VSTMP_RING_SIZE                  0x008C64
+#define   S_008C64_MEM_SIZE(x)                         (((x) & 0xFFFFFFFF) << 0)
+#define   G_008C64_MEM_SIZE(x)                         (((x) >> 0) & 0xFFFFFFFF)
+#define   C_008C64_MEM_SIZE                            0x00000000
+#define R_0288C8_SQ_GS_VERT_ITEMSIZE                 0x0288C8
+#define   S_0288C8_ITEMSIZE(x)                         (((x) & 0x7FFF) << 0)
+#define   G_0288C8_ITEMSIZE(x)                         (((x) >> 0) & 0x7FFF)
+#define   C_0288C8_ITEMSIZE                            0xFFFF8000
+#define R_028010_DB_DEPTH_INFO                       0x028010
+#define   S_028010_FORMAT(x)                           (((x) & 0x7) << 0)
+#define   G_028010_FORMAT(x)                           (((x) >> 0) & 0x7)
+#define   C_028010_FORMAT                              0xFFFFFFF8
+#define     V_028010_DEPTH_INVALID                     0x00000000
+#define     V_028010_DEPTH_16                          0x00000001
+#define     V_028010_DEPTH_X8_24                       0x00000002
+#define     V_028010_DEPTH_8_24                        0x00000003
+#define     V_028010_DEPTH_X8_24_FLOAT                 0x00000004
+#define     V_028010_DEPTH_8_24_FLOAT                  0x00000005
+#define     V_028010_DEPTH_32_FLOAT                    0x00000006
+#define     V_028010_DEPTH_X24_8_32_FLOAT              0x00000007
+#define   S_028010_READ_SIZE(x)                        (((x) & 0x1) << 3)
+#define   G_028010_READ_SIZE(x)                        (((x) >> 3) & 0x1)
+#define   C_028010_READ_SIZE                           0xFFFFFFF7
+#define   S_028010_ARRAY_MODE(x)                       (((x) & 0xF) << 15)
+#define   G_028010_ARRAY_MODE(x)                       (((x) >> 15) & 0xF)
+#define   C_028010_ARRAY_MODE                          0xFFF87FFF
+#define   S_028010_TILE_SURFACE_ENABLE(x)              (((x) & 0x1) << 25)
+#define   G_028010_TILE_SURFACE_ENABLE(x)              (((x) >> 25) & 0x1)
+#define   C_028010_TILE_SURFACE_ENABLE                 0xFDFFFFFF
+#define   S_028010_TILE_COMPACT(x)                     (((x) & 0x1) << 26)
+#define   G_028010_TILE_COMPACT(x)                     (((x) >> 26) & 0x1)
+#define   C_028010_TILE_COMPACT                        0xFBFFFFFF
+#define   S_028010_ZRANGE_PRECISION(x)                 (((x) & 0x1) << 31)
+#define   G_028010_ZRANGE_PRECISION(x)                 (((x) >> 31) & 0x1)
+#define   C_028010_ZRANGE_PRECISION                    0x7FFFFFFF
+#define R_028000_DB_DEPTH_SIZE                       0x028000
+#define   S_028000_PITCH_TILE_MAX(x)                   (((x) & 0x3FF) << 0)
+#define   G_028000_PITCH_TILE_MAX(x)                   (((x) >> 0) & 0x3FF)
+#define   C_028000_PITCH_TILE_MAX                      0xFFFFFC00
+#define   S_028000_SLICE_TILE_MAX(x)                   (((x) & 0xFFFFF) << 10)
+#define   G_028000_SLICE_TILE_MAX(x)                   (((x) >> 10) & 0xFFFFF)
+#define   C_028000_SLICE_TILE_MAX                      0xC00003FF
+#define R_028004_DB_DEPTH_VIEW                       0x028004
+#define   S_028004_SLICE_START(x)                      (((x) & 0x7FF) << 0)
+#define   G_028004_SLICE_START(x)                      (((x) >> 0) & 0x7FF)
+#define   C_028004_SLICE_START                         0xFFFFF800
+#define   S_028004_SLICE_MAX(x)                        (((x) & 0x7FF) << 13)
+#define   G_028004_SLICE_MAX(x)                        (((x) >> 13) & 0x7FF)
+#define   C_028004_SLICE_MAX                           0xFF001FFF
+#define R_028800_DB_DEPTH_CONTROL                    0x028800
+#define   S_028800_STENCIL_ENABLE(x)                   (((x) & 0x1) << 0)
+#define   G_028800_STENCIL_ENABLE(x)                   (((x) >> 0) & 0x1)
+#define   C_028800_STENCIL_ENABLE                      0xFFFFFFFE
+#define   S_028800_Z_ENABLE(x)                         (((x) & 0x1) << 1)
+#define   G_028800_Z_ENABLE(x)                         (((x) >> 1) & 0x1)
+#define   C_028800_Z_ENABLE                            0xFFFFFFFD
+#define   S_028800_Z_WRITE_ENABLE(x)                   (((x) & 0x1) << 2)
+#define   G_028800_Z_WRITE_ENABLE(x)                   (((x) >> 2) & 0x1)
+#define   C_028800_Z_WRITE_ENABLE                      0xFFFFFFFB
+#define   S_028800_ZFUNC(x)                            (((x) & 0x7) << 4)
+#define   G_028800_ZFUNC(x)                            (((x) >> 4) & 0x7)
+#define   C_028800_ZFUNC                               0xFFFFFF8F
+#define   S_028800_BACKFACE_ENABLE(x)                  (((x) & 0x1) << 7)
+#define   G_028800_BACKFACE_ENABLE(x)                  (((x) >> 7) & 0x1)
+#define   C_028800_BACKFACE_ENABLE                     0xFFFFFF7F
+#define   S_028800_STENCILFUNC(x)                      (((x) & 0x7) << 8)
+#define   G_028800_STENCILFUNC(x)                      (((x) >> 8) & 0x7)
+#define   C_028800_STENCILFUNC                         0xFFFFF8FF
+#define   S_028800_STENCILFAIL(x)                      (((x) & 0x7) << 11)
+#define   G_028800_STENCILFAIL(x)                      (((x) >> 11) & 0x7)
+#define   C_028800_STENCILFAIL                         0xFFFFC7FF
+#define   S_028800_STENCILZPASS(x)                     (((x) & 0x7) << 14)
+#define   G_028800_STENCILZPASS(x)                     (((x) >> 14) & 0x7)
+#define   C_028800_STENCILZPASS                        0xFFFE3FFF
+#define   S_028800_STENCILZFAIL(x)                     (((x) & 0x7) << 17)
+#define   G_028800_STENCILZFAIL(x)                     (((x) >> 17) & 0x7)
+#define   C_028800_STENCILZFAIL                        0xFFF1FFFF
+#define   S_028800_STENCILFUNC_BF(x)                   (((x) & 0x7) << 20)
+#define   G_028800_STENCILFUNC_BF(x)                   (((x) >> 20) & 0x7)
+#define   C_028800_STENCILFUNC_BF                      0xFF8FFFFF
+#define   S_028800_STENCILFAIL_BF(x)                   (((x) & 0x7) << 23)
+#define   G_028800_STENCILFAIL_BF(x)                   (((x) >> 23) & 0x7)
+#define   C_028800_STENCILFAIL_BF                      0xFC7FFFFF
+#define   S_028800_STENCILZPASS_BF(x)                  (((x) & 0x7) << 26)
+#define   G_028800_STENCILZPASS_BF(x)                  (((x) >> 26) & 0x7)
+#define   C_028800_STENCILZPASS_BF                     0xE3FFFFFF
+#define   S_028800_STENCILZFAIL_BF(x)                  (((x) & 0x7) << 29)
+#define   G_028800_STENCILZFAIL_BF(x)                  (((x) >> 29) & 0x7)
+#define   C_028800_STENCILZFAIL_BF                     0x1FFFFFFF
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c0356bb..829e26e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -89,6 +89,7 @@
 extern int radeon_connector_table;
 extern int radeon_tv;
 extern int radeon_new_pll;
+extern int radeon_dynpm;
 extern int radeon_audio;
 
 /*
@@ -118,6 +119,21 @@
 /*
  * BIOS.
  */
+#define ATRM_BIOS_PAGE 4096
+
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool radeon_atrm_supported(struct pci_dev *pdev);
+int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
+#else
+static inline bool radeon_atrm_supported(struct pci_dev *pdev)
+{
+	return false;
+}
+
+static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len){
+	return -EINVAL;
+}
+#endif
 bool radeon_get_bios(struct radeon_device *rdev);
 
 
@@ -138,17 +154,23 @@
 struct radeon_clock {
 	struct radeon_pll p1pll;
 	struct radeon_pll p2pll;
+	struct radeon_pll dcpll;
 	struct radeon_pll spll;
 	struct radeon_pll mpll;
 	/* 10 Khz units */
 	uint32_t default_mclk;
 	uint32_t default_sclk;
+	uint32_t default_dispclk;
+	uint32_t dp_extclk;
 };
 
 /*
  * Power management
  */
 int radeon_pm_init(struct radeon_device *rdev);
+void radeon_pm_compute_clocks(struct radeon_device *rdev);
+void radeon_combios_get_power_modes(struct radeon_device *rdev);
+void radeon_atombios_get_power_modes(struct radeon_device *rdev);
 
 /*
  * Fences.
@@ -275,6 +297,7 @@
 };
 
 #define RADEON_GPU_PAGE_SIZE 4096
+#define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
 
 struct radeon_gart {
 	dma_addr_t			table_addr;
@@ -309,21 +332,19 @@
 	/* for some chips with <= 32MB we need to lie
 	 * about vram size near mc fb location */
 	u64			mc_vram_size;
-	u64			gtt_location;
+	u64			visible_vram_size;
 	u64			gtt_size;
 	u64			gtt_start;
 	u64			gtt_end;
-	u64			vram_location;
 	u64			vram_start;
 	u64			vram_end;
 	unsigned		vram_width;
 	u64			real_vram_size;
 	int			vram_mtrr;
 	bool			vram_is_ddr;
-	bool                    igp_sideport_enabled;
+	bool			igp_sideport_enabled;
 };
 
-int radeon_mc_setup(struct radeon_device *rdev);
 bool radeon_combios_sideport_present(struct radeon_device *rdev);
 bool radeon_atombios_sideport_present(struct radeon_device *rdev);
 
@@ -348,6 +369,7 @@
 	bool		sw_int;
 	/* FIXME: use a define max crtc rather than hardcode it */
 	bool		crtc_vblank_int[2];
+	wait_queue_head_t	vblank_queue;
 	/* FIXME: use defines for max hpd/dacs */
 	bool            hpd[6];
 	spinlock_t sw_lock;
@@ -379,6 +401,7 @@
 struct radeon_ib_pool {
 	struct mutex		mutex;
 	struct radeon_bo	*robj;
+	struct list_head	bogus_ib;
 	struct radeon_ib	ibs[RADEON_IB_POOL_SIZE];
 	bool			ready;
 	unsigned		head_id;
@@ -433,6 +456,7 @@
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
 int radeon_ib_test(struct radeon_device *rdev);
+extern void radeon_ib_bogus_add(struct radeon_device *rdev, struct radeon_ib *ib);
 /* Ring access between begin & end cannot sleep */
 void radeon_ring_free_size(struct radeon_device *rdev);
 int radeon_ring_lock(struct radeon_device *rdev, unsigned ndw);
@@ -570,7 +594,99 @@
  * Equation between gpu/memory clock and available bandwidth is hw dependent
  * (type of memory, bus size, efficiency, ...)
  */
+enum radeon_pm_state {
+	PM_STATE_DISABLED,
+	PM_STATE_MINIMUM,
+	PM_STATE_PAUSED,
+	PM_STATE_ACTIVE
+};
+enum radeon_pm_action {
+	PM_ACTION_NONE,
+	PM_ACTION_MINIMUM,
+	PM_ACTION_DOWNCLOCK,
+	PM_ACTION_UPCLOCK
+};
+
+enum radeon_voltage_type {
+	VOLTAGE_NONE = 0,
+	VOLTAGE_GPIO,
+	VOLTAGE_VDDC,
+	VOLTAGE_SW
+};
+
+enum radeon_pm_state_type {
+	POWER_STATE_TYPE_DEFAULT,
+	POWER_STATE_TYPE_POWERSAVE,
+	POWER_STATE_TYPE_BATTERY,
+	POWER_STATE_TYPE_BALANCED,
+	POWER_STATE_TYPE_PERFORMANCE,
+};
+
+enum radeon_pm_clock_mode_type {
+	POWER_MODE_TYPE_DEFAULT,
+	POWER_MODE_TYPE_LOW,
+	POWER_MODE_TYPE_MID,
+	POWER_MODE_TYPE_HIGH,
+};
+
+struct radeon_voltage {
+	enum radeon_voltage_type type;
+	/* gpio voltage */
+	struct radeon_gpio_rec gpio;
+	u32 delay; /* delay in usec from voltage drop to sclk change */
+	bool active_high; /* voltage drop is active when bit is high */
+	/* VDDC voltage */
+	u8 vddc_id; /* index into vddc voltage table */
+	u8 vddci_id; /* index into vddci voltage table */
+	bool vddci_enabled;
+	/* r6xx+ sw */
+	u32 voltage;
+};
+
+struct radeon_pm_non_clock_info {
+	/* pcie lanes */
+	int pcie_lanes;
+	/* standardized non-clock flags */
+	u32 flags;
+};
+
+struct radeon_pm_clock_info {
+	/* memory clock */
+	u32 mclk;
+	/* engine clock */
+	u32 sclk;
+	/* voltage info */
+	struct radeon_voltage voltage;
+	/* standardized clock flags - not sure we'll need these */
+	u32 flags;
+};
+
+struct radeon_power_state {
+	enum radeon_pm_state_type type;
+	/* XXX: use a define for num clock modes */
+	struct radeon_pm_clock_info clock_info[8];
+	/* number of valid clock modes in this power state */
+	int num_clock_modes;
+	struct radeon_pm_clock_info *default_clock_mode;
+	/* non clock info about this state */
+	struct radeon_pm_non_clock_info non_clock_info;
+	bool voltage_drop_active;
+};
+
+/*
+ * Some modes are overclocked by very low value, accept them
+ */
+#define RADEON_MODE_OVERCLOCK_MARGIN 500 /* 5 MHz */
+
 struct radeon_pm {
+	struct mutex		mutex;
+	struct delayed_work	idle_work;
+	enum radeon_pm_state	state;
+	enum radeon_pm_action	planned_action;
+	unsigned long		action_timeout;
+	bool 			downclocked;
+	int			active_crtcs;
+	int			req_vblank;
 	fixed20_12		max_bandwidth;
 	fixed20_12		igp_sideport_mclk;
 	fixed20_12		igp_system_mclk;
@@ -582,6 +698,15 @@
 	fixed20_12		core_bandwidth;
 	fixed20_12		sclk;
 	fixed20_12		needed_bandwidth;
+	/* XXX: use a define for num power modes */
+	struct radeon_power_state power_state[8];
+	/* number of valid power states */
+	int                     num_power_states;
+	struct radeon_power_state *current_power_state;
+	struct radeon_pm_clock_info *current_clock_mode;
+	struct radeon_power_state *requested_power_state;
+	struct radeon_pm_clock_info *requested_clock_mode;
+	struct radeon_power_state *default_power_state;
 };
 
 
@@ -651,6 +776,7 @@
 	void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
 	uint32_t (*get_memory_clock)(struct radeon_device *rdev);
 	void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
+	int (*get_pcie_lanes)(struct radeon_device *rdev);
 	void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
 	void (*set_clock_gating)(struct radeon_device *rdev, int enable);
 	int (*set_surface_reg)(struct radeon_device *rdev, int reg,
@@ -701,6 +827,9 @@
 	unsigned sx_max_export_pos_size;
 	unsigned sx_max_export_smx_size;
 	unsigned sq_num_cf_insts;
+	unsigned tiling_nbanks;
+	unsigned tiling_npipes;
+	unsigned tiling_group_size;
 };
 
 struct rv770_asic {
@@ -721,6 +850,9 @@
 	unsigned sc_prim_fifo_size;
 	unsigned sc_hiz_tile_fifo_size;
 	unsigned sc_earlyz_tile_fifo_fize;
+	unsigned tiling_nbanks;
+	unsigned tiling_npipes;
+	unsigned tiling_group_size;
 };
 
 union radeon_asic_config {
@@ -830,6 +962,8 @@
 	struct r600_ih ih; /* r6/700 interrupt ring */
 	struct workqueue_struct *wq;
 	struct work_struct hotplug_work;
+	int num_crtc; /* number of crtcs */
+	struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
 
 	/* audio stuff */
 	struct timer_list	audio_timer;
@@ -838,6 +972,8 @@
 	int			audio_bits_per_sample;
 	uint8_t			audio_status_bits;
 	uint8_t			audio_category_code;
+
+	bool powered_down;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -895,6 +1031,8 @@
 #define WREG32_MC(reg, v) rdev->mc_wreg(rdev, (reg), (v))
 #define RREG32_PCIE(reg) rv370_pcie_rreg(rdev, (reg))
 #define WREG32_PCIE(reg, v) rv370_pcie_wreg(rdev, (reg), (v))
+#define RREG32_PCIE_P(reg) rdev->pciep_rreg(rdev, (reg))
+#define WREG32_PCIE_P(reg, v) rdev->pciep_wreg(rdev, (reg), (v))
 #define WREG32_P(reg, val, mask)				\
 	do {							\
 		uint32_t tmp_ = RREG32(reg);			\
@@ -956,7 +1094,7 @@
 #define ASIC_IS_AVIVO(rdev) ((rdev->family >= CHIP_RS600))
 #define ASIC_IS_DCE3(rdev) ((rdev->family >= CHIP_RV620))
 #define ASIC_IS_DCE32(rdev) ((rdev->family >= CHIP_RV730))
-
+#define ASIC_IS_DCE4(rdev) ((rdev->family >= CHIP_CEDAR))
 
 /*
  * BIOS helpers.
@@ -1015,6 +1153,7 @@
 #define radeon_set_engine_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
 #define radeon_get_memory_clock(rdev) (rdev)->asic->get_memory_clock((rdev))
 #define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_memory_clock((rdev), (e))
+#define radeon_get_pcie_lanes(rdev) (rdev)->asic->get_pcie_lanes((rdev))
 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l))
 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e))
 #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
@@ -1029,6 +1168,7 @@
 /* AGP */
 extern void radeon_agp_disable(struct radeon_device *rdev);
 extern int radeon_gart_table_vram_pin(struct radeon_device *rdev);
+extern void radeon_gart_restore(struct radeon_device *rdev);
 extern int radeon_modeset_init(struct radeon_device *rdev);
 extern void radeon_modeset_fini(struct radeon_device *rdev);
 extern bool radeon_card_posted(struct radeon_device *rdev);
@@ -1042,6 +1182,10 @@
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
+extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
+extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
+extern int radeon_resume_kms(struct drm_device *dev);
+extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
 
 /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */
 struct r100_mc_save {
@@ -1096,7 +1240,7 @@
 /* r300,r350,rv350,rv370,rv380 */
 extern void r300_set_reg_safe(struct radeon_device *rdev);
 extern void r300_mc_program(struct radeon_device *rdev);
-extern void r300_vram_info(struct radeon_device *rdev);
+extern void r300_mc_init(struct radeon_device *rdev);
 extern void r300_clock_startup(struct radeon_device *rdev);
 extern int r300_mc_wait_for_idle(struct radeon_device *rdev);
 extern int rv370_pcie_gart_init(struct radeon_device *rdev);
@@ -1105,7 +1249,6 @@
 extern void rv370_pcie_gart_disable(struct radeon_device *rdev);
 
 /* r420,r423,rv410 */
-extern int r420_mc_init(struct radeon_device *rdev);
 extern u32 r420_mc_rreg(struct radeon_device *rdev, u32 reg);
 extern void r420_mc_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 extern int r420_debugfs_pipes_info_init(struct radeon_device *rdev);
@@ -1147,13 +1290,13 @@
 					struct drm_display_mode *mode2);
 
 /* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */
+extern void r600_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern bool r600_card_posted(struct radeon_device *rdev);
 extern void r600_cp_stop(struct radeon_device *rdev);
 extern void r600_ring_init(struct radeon_device *rdev, unsigned ring_size);
 extern int r600_cp_resume(struct radeon_device *rdev);
 extern void r600_cp_fini(struct radeon_device *rdev);
 extern int r600_count_pipe_bits(uint32_t val);
-extern int r600_gart_clear_page(struct radeon_device *rdev, int i);
 extern int r600_mc_wait_for_idle(struct radeon_device *rdev);
 extern int r600_pcie_gart_init(struct radeon_device *rdev);
 extern void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
@@ -1189,6 +1332,14 @@
 					    uint8_t status_bits,
 					    uint8_t category_code);
 
+/* evergreen */
+struct evergreen_mc_save {
+	u32 vga_control[6];
+	u32 vga_render_control;
+	u32 vga_hdp_control;
+	u32 crtc_control[6];
+};
+
 #include "radeon_object.h"
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index c0681a55..c445779 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -237,6 +237,10 @@
 
 	rdev->mc.agp_base = rdev->ddev->agp->agp_info.aper_base;
 	rdev->mc.gtt_size = rdev->ddev->agp->agp_info.aper_size << 20;
+	rdev->mc.gtt_start = rdev->mc.agp_base;
+	rdev->mc.gtt_end = rdev->mc.gtt_start + rdev->mc.gtt_size - 1;
+	dev_info(rdev->dev, "GTT: %lluM 0x%08llX - 0x%08llX\n",
+		rdev->mc.gtt_size >> 20, rdev->mc.gtt_start, rdev->mc.gtt_end);
 
 	/* workaround some hw issues */
 	if (rdev->family < CHIP_R200) {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 05ee1ae..d3a157b 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -43,7 +43,7 @@
 void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 
 /*
- * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+ * r100,rv100,rs100,rv200,rs200
  */
 extern int r100_init(struct radeon_device *rdev);
 extern void r100_fini(struct radeon_device *rdev);
@@ -108,6 +108,52 @@
 	.set_engine_clock = &radeon_legacy_set_engine_clock,
 	.get_memory_clock = &radeon_legacy_get_memory_clock,
 	.set_memory_clock = NULL,
+	.get_pcie_lanes = NULL,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
+	.hpd_init = &r100_hpd_init,
+	.hpd_fini = &r100_hpd_fini,
+	.hpd_sense = &r100_hpd_sense,
+	.hpd_set_polarity = &r100_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
+};
+
+/*
+ * r200,rv250,rs300,rv280
+ */
+extern int r200_copy_dma(struct radeon_device *rdev,
+			uint64_t src_offset,
+			uint64_t dst_offset,
+			unsigned num_pages,
+			struct radeon_fence *fence);
+static struct radeon_asic r200_asic = {
+	.init = &r100_init,
+	.fini = &r100_fini,
+	.suspend = &r100_suspend,
+	.resume = &r100_resume,
+	.vga_set_state = &r100_vga_set_state,
+	.gpu_reset = &r100_gpu_reset,
+	.gart_tlb_flush = &r100_pci_gart_tlb_flush,
+	.gart_set_page = &r100_pci_gart_set_page,
+	.cp_commit = &r100_cp_commit,
+	.ring_start = &r100_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.get_vblank_counter = &r100_get_vblank_counter,
+	.fence_ring_emit = &r100_fence_ring_emit,
+	.cs_parse = &r100_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r200_copy_dma,
+	.copy = &r100_copy_blit,
+	.get_engine_clock = &radeon_legacy_get_engine_clock,
+	.set_engine_clock = &radeon_legacy_set_engine_clock,
+	.get_memory_clock = &radeon_legacy_get_memory_clock,
+	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
 	.set_surface_reg = r100_set_surface_reg,
@@ -138,11 +184,8 @@
 extern uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
 extern void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes);
-extern int r300_copy_dma(struct radeon_device *rdev,
-			uint64_t src_offset,
-			uint64_t dst_offset,
-			unsigned num_pages,
-			struct radeon_fence *fence);
+extern int rv370_get_pcie_lanes(struct radeon_device *rdev);
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.fini = &r300_fini,
@@ -162,7 +205,46 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
+	.copy = &r100_copy_blit,
+	.get_engine_clock = &radeon_legacy_get_engine_clock,
+	.set_engine_clock = &radeon_legacy_set_engine_clock,
+	.get_memory_clock = &radeon_legacy_get_memory_clock,
+	.set_memory_clock = NULL,
+	.get_pcie_lanes = &rv370_get_pcie_lanes,
+	.set_pcie_lanes = &rv370_set_pcie_lanes,
+	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
+	.bandwidth_update = &r100_bandwidth_update,
+	.hpd_init = &r100_hpd_init,
+	.hpd_fini = &r100_hpd_fini,
+	.hpd_sense = &r100_hpd_sense,
+	.hpd_set_polarity = &r100_hpd_set_polarity,
+	.ioctl_wait_idle = NULL,
+};
+
+
+static struct radeon_asic r300_asic_pcie = {
+	.init = &r300_init,
+	.fini = &r300_fini,
+	.suspend = &r300_suspend,
+	.resume = &r300_resume,
+	.vga_set_state = &r100_vga_set_state,
+	.gpu_reset = &r300_gpu_reset,
+	.gart_tlb_flush = &rv370_pcie_gart_tlb_flush,
+	.gart_set_page = &rv370_pcie_gart_set_page,
+	.cp_commit = &r100_cp_commit,
+	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ring_ib_execute = &r100_ring_ib_execute,
+	.irq_set = &r100_irq_set,
+	.irq_process = &r100_irq_process,
+	.get_vblank_counter = &r100_get_vblank_counter,
+	.fence_ring_emit = &r300_fence_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.copy_blit = &r100_copy_blit,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_legacy_get_engine_clock,
 	.set_engine_clock = &radeon_legacy_set_engine_clock,
@@ -206,12 +288,13 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
 	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.get_pcie_lanes = &rv370_get_pcie_lanes,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
 	.set_surface_reg = r100_set_surface_reg,
@@ -255,12 +338,13 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_legacy_get_engine_clock,
 	.set_engine_clock = &radeon_legacy_set_engine_clock,
 	.get_memory_clock = &radeon_legacy_get_memory_clock,
 	.set_memory_clock = NULL,
+	.get_pcie_lanes = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
 	.set_surface_reg = r100_set_surface_reg,
@@ -314,14 +398,17 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
 	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.get_pcie_lanes = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 	.bandwidth_update = &rs600_bandwidth_update,
 	.hpd_init = &rs600_hpd_init,
 	.hpd_fini = &rs600_hpd_fini,
@@ -360,12 +447,13 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
-	.copy = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
+	.copy = &r200_copy_dma,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
 	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.get_pcie_lanes = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
 	.set_surface_reg = r100_set_surface_reg,
@@ -412,12 +500,13 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
 	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.get_pcie_lanes = &rv370_get_pcie_lanes,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
 	.set_surface_reg = r100_set_surface_reg,
@@ -455,12 +544,13 @@
 	.fence_ring_emit = &r300_fence_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
-	.copy_dma = &r300_copy_dma,
+	.copy_dma = &r200_copy_dma,
 	.copy = &r100_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
 	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.get_pcie_lanes = &rv370_get_pcie_lanes,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
 	.set_surface_reg = r100_set_surface_reg,
@@ -538,8 +628,9 @@
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
 	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.get_pcie_lanes = &rv370_get_pcie_lanes,
 	.set_pcie_lanes = NULL,
-	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_clock_gating = NULL,
 	.set_surface_reg = r600_set_surface_reg,
 	.clear_surface_reg = r600_clear_surface_reg,
 	.bandwidth_update = &rv515_bandwidth_update,
@@ -583,6 +674,7 @@
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
 	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.get_pcie_lanes = &rv370_get_pcie_lanes,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
 	.set_surface_reg = r600_set_surface_reg,
@@ -595,4 +687,54 @@
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
 };
 
+/*
+ * evergreen
+ */
+int evergreen_init(struct radeon_device *rdev);
+void evergreen_fini(struct radeon_device *rdev);
+int evergreen_suspend(struct radeon_device *rdev);
+int evergreen_resume(struct radeon_device *rdev);
+int evergreen_gpu_reset(struct radeon_device *rdev);
+void evergreen_bandwidth_update(struct radeon_device *rdev);
+void evergreen_hpd_init(struct radeon_device *rdev);
+void evergreen_hpd_fini(struct radeon_device *rdev);
+bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
+void evergreen_hpd_set_polarity(struct radeon_device *rdev,
+				enum radeon_hpd_id hpd);
+
+static struct radeon_asic evergreen_asic = {
+	.init = &evergreen_init,
+	.fini = &evergreen_fini,
+	.suspend = &evergreen_suspend,
+	.resume = &evergreen_resume,
+	.cp_commit = NULL,
+	.gpu_reset = &evergreen_gpu_reset,
+	.vga_set_state = &r600_vga_set_state,
+	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+	.gart_set_page = &rs600_gart_set_page,
+	.ring_test = NULL,
+	.ring_ib_execute = NULL,
+	.irq_set = NULL,
+	.irq_process = NULL,
+	.get_vblank_counter = NULL,
+	.fence_ring_emit = NULL,
+	.cs_parse = NULL,
+	.copy_blit = NULL,
+	.copy_dma = NULL,
+	.copy = NULL,
+	.get_engine_clock = &radeon_atom_get_engine_clock,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.get_memory_clock = &radeon_atom_get_memory_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = NULL,
+	.set_surface_reg = r600_set_surface_reg,
+	.clear_surface_reg = r600_clear_surface_reg,
+	.bandwidth_update = &evergreen_bandwidth_update,
+	.hpd_init = &evergreen_hpd_init,
+	.hpd_fini = &evergreen_hpd_fini,
+	.hpd_sense = &evergreen_hpd_sense,
+	.hpd_set_polarity = &evergreen_hpd_set_polarity,
+};
+
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 4d88315..93783b1 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -159,8 +159,15 @@
 							    struct radeon_gpio_rec *gpio)
 {
 	struct radeon_hpd hpd;
+	u32 reg;
+
+	if (ASIC_IS_DCE4(rdev))
+		reg = EVERGREEN_DC_GPIO_HPD_A;
+	else
+		reg = AVIVO_DC_GPIO_HPD_A;
+
 	hpd.gpio = *gpio;
-	if (gpio->reg == AVIVO_DC_GPIO_HPD_A) {
+	if (gpio->reg == reg) {
 		switch(gpio->mask) {
 		case (1 << 0):
 			hpd.hpd = RADEON_HPD_1;
@@ -574,6 +581,9 @@
 				ddc_bus.valid = false;
 			}
 
+			/* needed for aux chan transactions */
+			ddc_bus.hpd_id = hpd.hpd ? (hpd.hpd - 1) : 0;
+
 			conn_id = le16_to_cpu(path->usConnObjectId);
 
 			if (!radeon_atom_apply_quirks
@@ -838,6 +848,7 @@
 	ATOM_FIRMWARE_INFO_V1_2 info_12;
 	ATOM_FIRMWARE_INFO_V1_3 info_13;
 	ATOM_FIRMWARE_INFO_V1_4 info_14;
+	ATOM_FIRMWARE_INFO_V2_1 info_21;
 };
 
 bool radeon_atom_get_clock_info(struct drm_device *dev)
@@ -849,6 +860,7 @@
 	uint8_t frev, crev;
 	struct radeon_pll *p1pll = &rdev->clock.p1pll;
 	struct radeon_pll *p2pll = &rdev->clock.p2pll;
+	struct radeon_pll *dcpll = &rdev->clock.dcpll;
 	struct radeon_pll *spll = &rdev->clock.spll;
 	struct radeon_pll *mpll = &rdev->clock.mpll;
 	uint16_t data_offset;
@@ -951,8 +963,19 @@
 		rdev->clock.default_mclk =
 		    le32_to_cpu(firmware_info->info.ulDefaultMemoryClock);
 
+		if (ASIC_IS_DCE4(rdev)) {
+			rdev->clock.default_dispclk =
+				le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq);
+			if (rdev->clock.default_dispclk == 0)
+				rdev->clock.default_dispclk = 60000; /* 600 Mhz */
+			rdev->clock.dp_extclk =
+				le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
+		}
+		*dcpll = *p1pll;
+
 		return true;
 	}
+
 	return false;
 }
 
@@ -1091,6 +1114,30 @@
 	return ss;
 }
 
+static void radeon_atom_apply_lvds_quirks(struct drm_device *dev,
+					  struct radeon_encoder_atom_dig *lvds)
+{
+
+	/* Toshiba A300-1BU laptop panel doesn't like new pll divider algo */
+	if ((dev->pdev->device == 0x95c4) &&
+	    (dev->pdev->subsystem_vendor == 0x1179) &&
+	    (dev->pdev->subsystem_device == 0xff50)) {
+		if ((lvds->native_mode.hdisplay == 1280) &&
+		    (lvds->native_mode.vdisplay == 800))
+			lvds->pll_algo = PLL_ALGO_LEGACY;
+	}
+
+	/* Dell Studio 15 laptop panel doesn't like new pll divider algo */
+	if ((dev->pdev->device == 0x95c4) &&
+	    (dev->pdev->subsystem_vendor == 0x1028) &&
+	    (dev->pdev->subsystem_device == 0x029f)) {
+		if ((lvds->native_mode.hdisplay == 1280) &&
+		    (lvds->native_mode.vdisplay == 800))
+			lvds->pll_algo = PLL_ALGO_LEGACY;
+	}
+
+}
+
 union lvds_info {
 	struct _ATOM_LVDS_INFO info;
 	struct _ATOM_LVDS_INFO_V12 info_12;
@@ -1161,6 +1208,21 @@
 
 		lvds->ss = radeon_atombios_get_ss_info(encoder, lvds_info->info.ucSS_Id);
 
+		if (ASIC_IS_AVIVO(rdev)) {
+			if (radeon_new_pll == 0)
+				lvds->pll_algo = PLL_ALGO_LEGACY;
+			else
+				lvds->pll_algo = PLL_ALGO_NEW;
+		} else {
+			if (radeon_new_pll == 1)
+				lvds->pll_algo = PLL_ALGO_NEW;
+			else
+				lvds->pll_algo = PLL_ALGO_LEGACY;
+		}
+
+		/* LVDS quirks */
+		radeon_atom_apply_lvds_quirks(dev, lvds);
+
 		encoder->native_mode = lvds->native_mode;
 	}
 	return lvds;
@@ -1385,6 +1447,371 @@
 	return tv_dac;
 }
 
+union power_info {
+	struct _ATOM_POWERPLAY_INFO info;
+	struct _ATOM_POWERPLAY_INFO_V2 info_2;
+	struct _ATOM_POWERPLAY_INFO_V3 info_3;
+	struct _ATOM_PPLIB_POWERPLAYTABLE info_4;
+};
+
+void radeon_atombios_get_power_modes(struct radeon_device *rdev)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
+	u16 data_offset;
+	u8 frev, crev;
+	u32 misc, misc2 = 0, sclk, mclk;
+	union power_info *power_info;
+	struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info;
+	struct _ATOM_PPLIB_STATE *power_state;
+	int num_modes = 0, i, j;
+	int state_index = 0, mode_index = 0;
+
+	atom_parse_data_header(mode_info->atom_context, index, NULL, &frev, &crev, &data_offset);
+
+	power_info = (union power_info *)(mode_info->atom_context->bios + data_offset);
+
+	rdev->pm.default_power_state = NULL;
+
+	if (power_info) {
+		if (frev < 4) {
+			num_modes = power_info->info.ucNumOfPowerModeEntries;
+			if (num_modes > ATOM_MAX_NUMBEROF_POWER_BLOCK)
+				num_modes = ATOM_MAX_NUMBEROF_POWER_BLOCK;
+			for (i = 0; i < num_modes; i++) {
+				rdev->pm.power_state[state_index].clock_info[0].voltage.type = VOLTAGE_NONE;
+				switch (frev) {
+				case 1:
+					rdev->pm.power_state[state_index].num_clock_modes = 1;
+					rdev->pm.power_state[state_index].clock_info[0].mclk =
+						le16_to_cpu(power_info->info.asPowerPlayInfo[i].usMemoryClock);
+					rdev->pm.power_state[state_index].clock_info[0].sclk =
+						le16_to_cpu(power_info->info.asPowerPlayInfo[i].usEngineClock);
+					/* skip invalid modes */
+					if ((rdev->pm.power_state[state_index].clock_info[0].mclk == 0) ||
+					    (rdev->pm.power_state[state_index].clock_info[0].sclk == 0))
+						continue;
+					/* skip overclock modes for now */
+					if ((rdev->pm.power_state[state_index].clock_info[0].mclk >
+					     rdev->clock.default_mclk + RADEON_MODE_OVERCLOCK_MARGIN) ||
+					    (rdev->pm.power_state[state_index].clock_info[0].sclk >
+					     rdev->clock.default_sclk + RADEON_MODE_OVERCLOCK_MARGIN))
+						continue;
+					rdev->pm.power_state[state_index].non_clock_info.pcie_lanes =
+						power_info->info.asPowerPlayInfo[i].ucNumPciELanes;
+					misc = le32_to_cpu(power_info->info.asPowerPlayInfo[i].ulMiscInfo);
+					if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
+						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
+							VOLTAGE_GPIO;
+						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
+							radeon_lookup_gpio(rdev,
+							power_info->info.asPowerPlayInfo[i].ucVoltageDropIndex);
+						if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
+							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+								true;
+						else
+							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+								false;
+					} else if (misc & ATOM_PM_MISCINFO_PROGRAM_VOLTAGE) {
+						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
+							VOLTAGE_VDDC;
+						rdev->pm.power_state[state_index].clock_info[0].voltage.vddc_id =
+							power_info->info.asPowerPlayInfo[i].ucVoltageDropIndex;
+					}
+					/* order matters! */
+					if (misc & ATOM_PM_MISCINFO_POWER_SAVING_MODE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_POWERSAVE;
+					if (misc & ATOM_PM_MISCINFO_DEFAULT_DC_STATE_ENTRY_TRUE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BATTERY;
+					if (misc & ATOM_PM_MISCINFO_DEFAULT_LOW_DC_STATE_ENTRY_TRUE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BATTERY;
+					if (misc & ATOM_PM_MISCINFO_LOAD_BALANCE_EN)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BALANCED;
+					if (misc & ATOM_PM_MISCINFO_3D_ACCELERATION_EN)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_PERFORMANCE;
+					if (misc & ATOM_PM_MISCINFO_DRIVER_DEFAULT_MODE) {
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_DEFAULT;
+						rdev->pm.default_power_state = &rdev->pm.power_state[state_index];
+						rdev->pm.power_state[state_index].default_clock_mode =
+							&rdev->pm.power_state[state_index].clock_info[0];
+					}
+					state_index++;
+					break;
+				case 2:
+					rdev->pm.power_state[state_index].num_clock_modes = 1;
+					rdev->pm.power_state[state_index].clock_info[0].mclk =
+						le32_to_cpu(power_info->info_2.asPowerPlayInfo[i].ulMemoryClock);
+					rdev->pm.power_state[state_index].clock_info[0].sclk =
+						le32_to_cpu(power_info->info_2.asPowerPlayInfo[i].ulEngineClock);
+					/* skip invalid modes */
+					if ((rdev->pm.power_state[state_index].clock_info[0].mclk == 0) ||
+					    (rdev->pm.power_state[state_index].clock_info[0].sclk == 0))
+						continue;
+					/* skip overclock modes for now */
+					if ((rdev->pm.power_state[state_index].clock_info[0].mclk >
+					     rdev->clock.default_mclk + RADEON_MODE_OVERCLOCK_MARGIN) ||
+					    (rdev->pm.power_state[state_index].clock_info[0].sclk >
+					     rdev->clock.default_sclk + RADEON_MODE_OVERCLOCK_MARGIN))
+						continue;
+					rdev->pm.power_state[state_index].non_clock_info.pcie_lanes =
+						power_info->info_2.asPowerPlayInfo[i].ucNumPciELanes;
+					misc = le32_to_cpu(power_info->info_2.asPowerPlayInfo[i].ulMiscInfo);
+					misc2 = le32_to_cpu(power_info->info_2.asPowerPlayInfo[i].ulMiscInfo2);
+					if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
+						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
+							VOLTAGE_GPIO;
+						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
+							radeon_lookup_gpio(rdev,
+							power_info->info_2.asPowerPlayInfo[i].ucVoltageDropIndex);
+						if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
+							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+								true;
+						else
+							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+								false;
+					} else if (misc & ATOM_PM_MISCINFO_PROGRAM_VOLTAGE) {
+						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
+							VOLTAGE_VDDC;
+						rdev->pm.power_state[state_index].clock_info[0].voltage.vddc_id =
+							power_info->info_2.asPowerPlayInfo[i].ucVoltageDropIndex;
+					}
+					/* order matters! */
+					if (misc & ATOM_PM_MISCINFO_POWER_SAVING_MODE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_POWERSAVE;
+					if (misc & ATOM_PM_MISCINFO_DEFAULT_DC_STATE_ENTRY_TRUE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BATTERY;
+					if (misc & ATOM_PM_MISCINFO_DEFAULT_LOW_DC_STATE_ENTRY_TRUE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BATTERY;
+					if (misc & ATOM_PM_MISCINFO_LOAD_BALANCE_EN)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BALANCED;
+					if (misc & ATOM_PM_MISCINFO_3D_ACCELERATION_EN)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_PERFORMANCE;
+					if (misc2 & ATOM_PM_MISCINFO2_SYSTEM_AC_LITE_MODE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BALANCED;
+					if (misc & ATOM_PM_MISCINFO_DRIVER_DEFAULT_MODE) {
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_DEFAULT;
+						rdev->pm.default_power_state = &rdev->pm.power_state[state_index];
+						rdev->pm.power_state[state_index].default_clock_mode =
+							&rdev->pm.power_state[state_index].clock_info[0];
+					}
+					state_index++;
+					break;
+				case 3:
+					rdev->pm.power_state[state_index].num_clock_modes = 1;
+					rdev->pm.power_state[state_index].clock_info[0].mclk =
+						le32_to_cpu(power_info->info_3.asPowerPlayInfo[i].ulMemoryClock);
+					rdev->pm.power_state[state_index].clock_info[0].sclk =
+						le32_to_cpu(power_info->info_3.asPowerPlayInfo[i].ulEngineClock);
+					/* skip invalid modes */
+					if ((rdev->pm.power_state[state_index].clock_info[0].mclk == 0) ||
+					    (rdev->pm.power_state[state_index].clock_info[0].sclk == 0))
+						continue;
+					/* skip overclock modes for now */
+					if ((rdev->pm.power_state[state_index].clock_info[0].mclk >
+					     rdev->clock.default_mclk + RADEON_MODE_OVERCLOCK_MARGIN) ||
+					    (rdev->pm.power_state[state_index].clock_info[0].sclk >
+					     rdev->clock.default_sclk + RADEON_MODE_OVERCLOCK_MARGIN))
+						continue;
+					rdev->pm.power_state[state_index].non_clock_info.pcie_lanes =
+						power_info->info_3.asPowerPlayInfo[i].ucNumPciELanes;
+					misc = le32_to_cpu(power_info->info_3.asPowerPlayInfo[i].ulMiscInfo);
+					misc2 = le32_to_cpu(power_info->info_3.asPowerPlayInfo[i].ulMiscInfo2);
+					if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
+						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
+							VOLTAGE_GPIO;
+						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio =
+							radeon_lookup_gpio(rdev,
+							power_info->info_3.asPowerPlayInfo[i].ucVoltageDropIndex);
+						if (misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_ACTIVE_HIGH)
+							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+								true;
+						else
+							rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+								false;
+					} else if (misc & ATOM_PM_MISCINFO_PROGRAM_VOLTAGE) {
+						rdev->pm.power_state[state_index].clock_info[0].voltage.type =
+							VOLTAGE_VDDC;
+						rdev->pm.power_state[state_index].clock_info[0].voltage.vddc_id =
+							power_info->info_3.asPowerPlayInfo[i].ucVoltageDropIndex;
+						if (misc2 & ATOM_PM_MISCINFO2_VDDCI_DYNAMIC_VOLTAGE_EN) {
+							rdev->pm.power_state[state_index].clock_info[0].voltage.vddci_enabled =
+								true;
+							rdev->pm.power_state[state_index].clock_info[0].voltage.vddci_id =
+							power_info->info_3.asPowerPlayInfo[i].ucVDDCI_VoltageDropIndex;
+						}
+					}
+					/* order matters! */
+					if (misc & ATOM_PM_MISCINFO_POWER_SAVING_MODE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_POWERSAVE;
+					if (misc & ATOM_PM_MISCINFO_DEFAULT_DC_STATE_ENTRY_TRUE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BATTERY;
+					if (misc & ATOM_PM_MISCINFO_DEFAULT_LOW_DC_STATE_ENTRY_TRUE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BATTERY;
+					if (misc & ATOM_PM_MISCINFO_LOAD_BALANCE_EN)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BALANCED;
+					if (misc & ATOM_PM_MISCINFO_3D_ACCELERATION_EN)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_PERFORMANCE;
+					if (misc2 & ATOM_PM_MISCINFO2_SYSTEM_AC_LITE_MODE)
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BALANCED;
+					if (misc & ATOM_PM_MISCINFO_DRIVER_DEFAULT_MODE) {
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_DEFAULT;
+						rdev->pm.default_power_state = &rdev->pm.power_state[state_index];
+						rdev->pm.power_state[state_index].default_clock_mode =
+							&rdev->pm.power_state[state_index].clock_info[0];
+					}
+					state_index++;
+					break;
+				}
+			}
+		} else if (frev == 4) {
+			for (i = 0; i < power_info->info_4.ucNumStates; i++) {
+				mode_index = 0;
+				power_state = (struct _ATOM_PPLIB_STATE *)
+					(mode_info->atom_context->bios +
+					 data_offset +
+					 le16_to_cpu(power_info->info_4.usStateArrayOffset) +
+					 i * power_info->info_4.ucStateEntrySize);
+				non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
+					(mode_info->atom_context->bios +
+					 data_offset +
+					 le16_to_cpu(power_info->info_4.usNonClockInfoArrayOffset) +
+					 (power_state->ucNonClockStateIndex *
+					  power_info->info_4.ucNonClockSize));
+				for (j = 0; j < (power_info->info_4.ucStateEntrySize - 1); j++) {
+					if (rdev->flags & RADEON_IS_IGP) {
+						struct _ATOM_PPLIB_RS780_CLOCK_INFO *clock_info =
+							(struct _ATOM_PPLIB_RS780_CLOCK_INFO *)
+							(mode_info->atom_context->bios +
+							 data_offset +
+							 le16_to_cpu(power_info->info_4.usClockInfoArrayOffset) +
+							 (power_state->ucClockStateIndices[j] *
+							  power_info->info_4.ucClockInfoSize));
+						sclk = le16_to_cpu(clock_info->usLowEngineClockLow);
+						sclk |= clock_info->ucLowEngineClockHigh << 16;
+						rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk;
+						/* skip invalid modes */
+						if (rdev->pm.power_state[state_index].clock_info[mode_index].sclk == 0)
+							continue;
+						/* skip overclock modes for now */
+						if (rdev->pm.power_state[state_index].clock_info[mode_index].sclk >
+						    rdev->clock.default_sclk + RADEON_MODE_OVERCLOCK_MARGIN)
+							continue;
+						rdev->pm.power_state[state_index].clock_info[mode_index].voltage.type =
+							VOLTAGE_SW;
+						rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage =
+							clock_info->usVDDC;
+						mode_index++;
+					} else {
+						struct _ATOM_PPLIB_R600_CLOCK_INFO *clock_info =
+							(struct _ATOM_PPLIB_R600_CLOCK_INFO *)
+							(mode_info->atom_context->bios +
+							 data_offset +
+							 le16_to_cpu(power_info->info_4.usClockInfoArrayOffset) +
+							 (power_state->ucClockStateIndices[j] *
+							  power_info->info_4.ucClockInfoSize));
+						sclk = le16_to_cpu(clock_info->usEngineClockLow);
+						sclk |= clock_info->ucEngineClockHigh << 16;
+						mclk = le16_to_cpu(clock_info->usMemoryClockLow);
+						mclk |= clock_info->ucMemoryClockHigh << 16;
+						rdev->pm.power_state[state_index].clock_info[mode_index].mclk = mclk;
+						rdev->pm.power_state[state_index].clock_info[mode_index].sclk = sclk;
+						/* skip invalid modes */
+						if ((rdev->pm.power_state[state_index].clock_info[mode_index].mclk == 0) ||
+						    (rdev->pm.power_state[state_index].clock_info[mode_index].sclk == 0))
+							continue;
+						/* skip overclock modes for now */
+						if ((rdev->pm.power_state[state_index].clock_info[mode_index].mclk >
+						     rdev->clock.default_mclk + RADEON_MODE_OVERCLOCK_MARGIN) ||
+						    (rdev->pm.power_state[state_index].clock_info[mode_index].sclk >
+						     rdev->clock.default_sclk + RADEON_MODE_OVERCLOCK_MARGIN))
+							continue;
+						rdev->pm.power_state[state_index].clock_info[mode_index].voltage.type =
+							VOLTAGE_SW;
+						rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage =
+							clock_info->usVDDC;
+						mode_index++;
+					}
+				}
+				rdev->pm.power_state[state_index].num_clock_modes = mode_index;
+				if (mode_index) {
+					misc = le32_to_cpu(non_clock_info->ulCapsAndSettings);
+					misc2 = le16_to_cpu(non_clock_info->usClassification);
+					rdev->pm.power_state[state_index].non_clock_info.pcie_lanes =
+						((misc & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >>
+						ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1;
+					switch (misc2 & ATOM_PPLIB_CLASSIFICATION_UI_MASK) {
+					case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY:
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BATTERY;
+						break;
+					case ATOM_PPLIB_CLASSIFICATION_UI_BALANCED:
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_BALANCED;
+						break;
+					case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE:
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_PERFORMANCE;
+						break;
+					}
+					if (misc2 & ATOM_PPLIB_CLASSIFICATION_BOOT) {
+						rdev->pm.power_state[state_index].type =
+							POWER_STATE_TYPE_DEFAULT;
+						rdev->pm.default_power_state = &rdev->pm.power_state[state_index];
+						rdev->pm.power_state[state_index].default_clock_mode =
+							&rdev->pm.power_state[state_index].clock_info[mode_index - 1];
+					}
+					state_index++;
+				}
+			}
+		}
+	} else {
+		/* XXX figure out some good default low power mode for cards w/out power tables */
+	}
+
+	if (rdev->pm.default_power_state == NULL) {
+		/* add the default mode */
+		rdev->pm.power_state[state_index].type =
+			POWER_STATE_TYPE_DEFAULT;
+		rdev->pm.power_state[state_index].num_clock_modes = 1;
+		rdev->pm.power_state[state_index].clock_info[0].mclk = rdev->clock.default_mclk;
+		rdev->pm.power_state[state_index].clock_info[0].sclk = rdev->clock.default_sclk;
+		rdev->pm.power_state[state_index].default_clock_mode =
+			&rdev->pm.power_state[state_index].clock_info[0];
+		rdev->pm.power_state[state_index].clock_info[0].voltage.type = VOLTAGE_NONE;
+		if (rdev->asic->get_pcie_lanes)
+			rdev->pm.power_state[state_index].non_clock_info.pcie_lanes = radeon_get_pcie_lanes(rdev);
+		else
+			rdev->pm.power_state[state_index].non_clock_info.pcie_lanes = 16;
+		rdev->pm.default_power_state = &rdev->pm.power_state[state_index];
+		state_index++;
+	}
+	rdev->pm.num_power_states = state_index;
+
+	rdev->pm.current_power_state = rdev->pm.default_power_state;
+	rdev->pm.current_clock_mode =
+		rdev->pm.default_power_state->default_clock_mode;
+}
+
 void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable)
 {
 	DYNAMIC_CLOCK_GATING_PS_ALLOCATION args;
@@ -1395,16 +1822,6 @@
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
-void radeon_atom_static_pwrmgt_setup(struct radeon_device *rdev, int enable)
-{
-	ENABLE_ASIC_STATIC_PWR_MGT_PS_ALLOCATION args;
-	int index = GetIndexIntoMasterTable(COMMAND, EnableASIC_StaticPwrMgt);
-
-	args.ucEnable = enable;
-
-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
-}
-
 uint32_t radeon_atom_get_engine_clock(struct radeon_device *rdev)
 {
 	GET_ENGINE_CLOCK_PS_ALLOCATION args;
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
new file mode 100644
index 0000000..3f557c4
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * Licensed under GPLv2
+ *
+ * ATPX support for both Intel/ATI
+ */
+#include <linux/vga_switcheroo.h>
+#include <acpi/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <linux/pci.h>
+
+#define ATPX_VERSION 0
+#define ATPX_GPU_PWR 2
+#define ATPX_MUX_SELECT 3
+
+#define ATPX_INTEGRATED 0
+#define ATPX_DISCRETE 1
+
+#define ATPX_MUX_IGD 0
+#define ATPX_MUX_DISCRETE 1
+
+static struct radeon_atpx_priv {
+	bool atpx_detected;
+	/* handle for device - and atpx */
+	acpi_handle dhandle;
+	acpi_handle atpx_handle;
+	acpi_handle atrm_handle;
+} radeon_atpx_priv;
+
+/* retrieve the ROM in 4k blocks */
+static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
+			    int offset, int len)
+{
+	acpi_status status;
+	union acpi_object atrm_arg_elements[2], *obj;
+	struct acpi_object_list atrm_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
+
+	atrm_arg.count = 2;
+	atrm_arg.pointer = &atrm_arg_elements[0];
+
+	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[0].integer.value = offset;
+
+	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[1].integer.value = len;
+
+	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+		return -ENODEV;
+	}
+
+	obj = (union acpi_object *)buffer.pointer;
+	memcpy(bios+offset, obj->buffer.pointer, len);
+	kfree(buffer.pointer);
+	return len;
+}
+
+bool radeon_atrm_supported(struct pci_dev *pdev)
+{
+	/* get the discrete ROM only via ATRM */
+	if (!radeon_atpx_priv.atpx_detected)
+		return false;
+
+	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return false;
+	return true;
+}
+
+
+int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len)
+{
+	return radeon_atrm_call(radeon_atpx_priv.atrm_handle, bios, offset, len);
+}
+
+static int radeon_atpx_get_version(acpi_handle handle)
+{
+	acpi_status status;
+	union acpi_object atpx_arg_elements[2], *obj;
+	struct acpi_object_list atpx_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	atpx_arg.count = 2;
+	atpx_arg.pointer = &atpx_arg_elements[0];
+
+	atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[0].integer.value = ATPX_VERSION;
+
+	atpx_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[1].integer.value = ATPX_VERSION;
+
+	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("%s: failed to call ATPX: %s\n", __func__, acpi_format_exception(status));
+		return -ENOSYS;
+	}
+	obj = (union acpi_object *)buffer.pointer;
+	if (obj && (obj->type == ACPI_TYPE_BUFFER))
+		printk(KERN_INFO "radeon atpx: version is %d\n", *((u8 *)(obj->buffer.pointer) + 2));
+	kfree(buffer.pointer);
+	return 0;
+}
+
+static int radeon_atpx_execute(acpi_handle handle, int cmd_id, u16 value)
+{
+	acpi_status status;
+	union acpi_object atpx_arg_elements[2];
+	struct acpi_object_list atpx_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	uint8_t buf[4] = {0};
+
+	if (!handle)
+		return -EINVAL;
+
+	atpx_arg.count = 2;
+	atpx_arg.pointer = &atpx_arg_elements[0];
+
+	atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[0].integer.value = cmd_id;
+
+	buf[2] = value & 0xff;
+	buf[3] = (value >> 8) & 0xff;
+
+	atpx_arg_elements[1].type = ACPI_TYPE_BUFFER;
+	atpx_arg_elements[1].buffer.length = 4;
+	atpx_arg_elements[1].buffer.pointer = buf;
+
+	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("%s: failed to call ATPX: %s\n", __func__, acpi_format_exception(status));
+		return -ENOSYS;
+	}
+	kfree(buffer.pointer);
+
+	return 0;
+}
+
+static int radeon_atpx_set_discrete_state(acpi_handle handle, int state)
+{
+	return radeon_atpx_execute(handle, ATPX_GPU_PWR, state);
+}
+
+static int radeon_atpx_switch_mux(acpi_handle handle, int mux_id)
+{
+	return radeon_atpx_execute(handle, ATPX_MUX_SELECT, mux_id);
+}
+
+
+static int radeon_atpx_switchto(enum vga_switcheroo_client_id id)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 0);
+	else
+		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 1);
+	return 0;
+}
+
+static int radeon_atpx_power_state(enum vga_switcheroo_client_id id,
+				   enum vga_switcheroo_state state)
+{
+	/* on w500 ACPI can't change intel gpu state */
+	if (id == VGA_SWITCHEROO_IGD)
+		return 0;
+
+	radeon_atpx_set_discrete_state(radeon_atpx_priv.atpx_handle, state);
+	return 0;
+}
+
+static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
+{
+	acpi_handle dhandle, atpx_handle, atrm_handle;
+	acpi_status status;
+
+	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	radeon_atpx_priv.dhandle = dhandle;
+	radeon_atpx_priv.atpx_handle = atpx_handle;
+	radeon_atpx_priv.atrm_handle = atrm_handle;
+	return true;
+}
+
+static int radeon_atpx_init(void)
+{
+	/* set up the ATPX handle */
+
+	radeon_atpx_get_version(radeon_atpx_priv.atpx_handle);
+	return 0;
+}
+
+static int radeon_atpx_get_client_id(struct pci_dev *pdev)
+{
+	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return VGA_SWITCHEROO_IGD;
+	else
+		return VGA_SWITCHEROO_DIS;
+}
+
+static struct vga_switcheroo_handler radeon_atpx_handler = {
+	.switchto = radeon_atpx_switchto,
+	.power_state = radeon_atpx_power_state,
+	.init = radeon_atpx_init,
+	.get_client_id = radeon_atpx_get_client_id,
+};
+
+static bool radeon_atpx_detect(void)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	struct pci_dev *pdev = NULL;
+	bool has_atpx = false;
+	int vga_count = 0;
+
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true);
+	}
+
+	if (has_atpx && vga_count == 2) {
+		acpi_get_name(radeon_atpx_priv.atpx_handle, ACPI_FULL_PATHNAME, &buffer);
+		printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n",
+		       acpi_method_name);
+		radeon_atpx_priv.atpx_detected = true;
+		return true;
+	}
+	return false;
+}
+
+void radeon_register_atpx_handler(void)
+{
+	bool r;
+
+	/* detect if we have any ATPX + 2 VGA in the system */
+	r = radeon_atpx_detect();
+	if (!r)
+		return;
+
+	vga_switcheroo_register_handler(&radeon_atpx_handler);
+}
+
+void radeon_unregister_atpx_handler(void)
+{
+	vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 9069217..5572404 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -30,6 +30,7 @@
 #include "radeon.h"
 #include "atom.h"
 
+#include <linux/vga_switcheroo.h>
 /*
  * BIOS.
  */
@@ -62,7 +63,7 @@
 		iounmap(bios);
 		return false;
 	}
-	memcpy(rdev->bios, bios, size);
+	memcpy_fromio(rdev->bios, bios, size);
 	iounmap(bios);
 	return true;
 }
@@ -93,6 +94,38 @@
 	return true;
 }
 
+/* ATRM is used to get the BIOS on the discrete cards in
+ * dual-gpu systems.
+ */
+static bool radeon_atrm_get_bios(struct radeon_device *rdev)
+{
+	int ret;
+	int size = 64 * 1024;
+	int i;
+
+	if (!radeon_atrm_supported(rdev->pdev))
+		return false;
+
+	rdev->bios = kmalloc(size, GFP_KERNEL);
+	if (!rdev->bios) {
+		DRM_ERROR("Unable to allocate bios\n");
+		return false;
+	}
+
+	for (i = 0; i < size / ATRM_BIOS_PAGE; i++) {
+		ret = radeon_atrm_get_bios_chunk(rdev->bios,
+						 (i * ATRM_BIOS_PAGE),
+						 ATRM_BIOS_PAGE);
+		if (ret <= 0)
+			break;
+	}
+
+	if (i == 0 || rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) {
+		kfree(rdev->bios);
+		return false;
+	}
+	return true;
+}
 static bool r700_read_disabled_bios(struct radeon_device *rdev)
 {
 	uint32_t viph_control;
@@ -388,16 +421,16 @@
 		return legacy_read_disabled_bios(rdev);
 }
 
+
 bool radeon_get_bios(struct radeon_device *rdev)
 {
 	bool r;
 	uint16_t tmp;
 
-	if (rdev->flags & RADEON_IS_IGP) {
+	r = radeon_atrm_get_bios(rdev);
+	if (r == false)
 		r = igp_read_bios_from_vram(rdev);
-		if (r == false)
-			r = radeon_read_bios(rdev);
-	} else
+	if (r == false)
 		r = radeon_read_bios(rdev);
 	if (r == false) {
 		r = radeon_read_disabled_bios(rdev);
@@ -408,6 +441,13 @@
 		return false;
 	}
 	if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) {
+		printk("BIOS signature incorrect %x %x\n", rdev->bios[0], rdev->bios[1]);
+		goto free_bios;
+	}
+
+	tmp = RBIOS16(0x18);
+	if (RBIOS8(tmp + 0x14) != 0x0) {
+		DRM_INFO("Not an x86 BIOS ROM, not using.\n");
 		goto free_bios;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c
index 73c4405..f64936c 100644
--- a/drivers/gpu/drm/radeon/radeon_clocks.c
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -96,6 +96,7 @@
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_pll *p1pll = &rdev->clock.p1pll;
 	struct radeon_pll *p2pll = &rdev->clock.p2pll;
+	struct radeon_pll *dcpll = &rdev->clock.dcpll;
 	struct radeon_pll *spll = &rdev->clock.spll;
 	struct radeon_pll *mpll = &rdev->clock.mpll;
 	int ret;
@@ -204,6 +205,17 @@
 		p2pll->max_frac_feedback_div = 0;
 	}
 
+	/* dcpll is DCE4 only */
+	dcpll->min_post_div = 2;
+	dcpll->max_post_div = 0x7f;
+	dcpll->min_frac_feedback_div = 0;
+	dcpll->max_frac_feedback_div = 9;
+	dcpll->min_ref_div = 2;
+	dcpll->max_ref_div = 0x3ff;
+	dcpll->min_feedback_div = 4;
+	dcpll->max_feedback_div = 0xfff;
+	dcpll->best_vco = 0;
+
 	p1pll->min_ref_div = 2;
 	p1pll->max_ref_div = 0x3ff;
 	p1pll->min_feedback_div = 4;
@@ -846,8 +858,10 @@
 	/* XXX make sure engine is idle */
 
 	if (radeon_dynclks != -1) {
-		if (radeon_dynclks)
-			radeon_set_clock_gating(rdev, 1);
+		if (radeon_dynclks) {
+			if (rdev->asic->set_clock_gating)
+				radeon_set_clock_gating(rdev, 1);
+		}
 	}
 	radeon_apply_clock_quirks(rdev);
 	return 0;
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 22d4761..e9ea38e 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -150,6 +150,9 @@
 	int rev;
 	uint16_t offset = 0, check_offset;
 
+	if (!rdev->bios)
+		return 0;
+
 	switch (table) {
 		/* absolute offset tables */
 	case COMBIOS_ASIC_INIT_1_TABLE:
@@ -443,6 +446,39 @@
 
 }
 
+bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
+{
+	int edid_info;
+	struct edid *edid;
+	edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE);
+	if (!edid_info)
+		return false;
+
+	edid = kmalloc(EDID_LENGTH * (DRM_MAX_EDID_EXT_NUM + 1),
+		       GFP_KERNEL);
+	if (edid == NULL)
+		return false;
+
+	memcpy((unsigned char *)edid,
+	       (unsigned char *)(rdev->bios + edid_info), EDID_LENGTH);
+
+	if (!drm_edid_is_valid(edid)) {
+		kfree(edid);
+		return false;
+	}
+
+	rdev->mode_info.bios_hardcoded_edid = edid;
+	return true;
+}
+
+struct edid *
+radeon_combios_get_hardcoded_edid(struct radeon_device *rdev)
+{
+	if (rdev->mode_info.bios_hardcoded_edid)
+		return rdev->mode_info.bios_hardcoded_edid;
+	return NULL;
+}
+
 static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rdev,
 						       int ddc_line)
 {
@@ -486,9 +522,65 @@
 		i2c.y_data_reg = ddc_line;
 	}
 
-	if (rdev->family < CHIP_R200)
-		i2c.hw_capable = false;
-	else {
+	switch (rdev->family) {
+	case CHIP_R100:
+	case CHIP_RV100:
+	case CHIP_RS100:
+	case CHIP_RV200:
+	case CHIP_RS200:
+	case CHIP_RS300:
+		switch (ddc_line) {
+		case RADEON_GPIO_DVI_DDC:
+			/* in theory this should be hw capable,
+			 * but it doesn't seem to work
+			 */
+			i2c.hw_capable = false;
+			break;
+		default:
+			i2c.hw_capable = false;
+			break;
+		}
+		break;
+	case CHIP_R200:
+		switch (ddc_line) {
+		case RADEON_GPIO_DVI_DDC:
+		case RADEON_GPIO_MONID:
+			i2c.hw_capable = true;
+			break;
+		default:
+			i2c.hw_capable = false;
+			break;
+		}
+		break;
+	case CHIP_RV250:
+	case CHIP_RV280:
+		switch (ddc_line) {
+		case RADEON_GPIO_VGA_DDC:
+		case RADEON_GPIO_DVI_DDC:
+		case RADEON_GPIO_CRT2_DDC:
+			i2c.hw_capable = true;
+			break;
+		default:
+			i2c.hw_capable = false;
+			break;
+		}
+		break;
+	case CHIP_R300:
+	case CHIP_R350:
+		switch (ddc_line) {
+		case RADEON_GPIO_VGA_DDC:
+		case RADEON_GPIO_DVI_DDC:
+			i2c.hw_capable = true;
+			break;
+		default:
+			i2c.hw_capable = false;
+			break;
+		}
+		break;
+	case CHIP_RV350:
+	case CHIP_RV380:
+	case CHIP_RS400:
+	case CHIP_RS480:
 		switch (ddc_line) {
 		case RADEON_GPIO_VGA_DDC:
 		case RADEON_GPIO_DVI_DDC:
@@ -504,9 +596,14 @@
 			i2c.hw_capable = false;
 			break;
 		}
+		break;
+	default:
+		i2c.hw_capable = false;
+		break;
 	}
 	i2c.mm_i2c = false;
 	i2c.i2c_id = 0;
+	i2c.hpd_id = 0;
 
 	if (ddc_line)
 		i2c.valid = true;
@@ -527,9 +624,6 @@
 	int8_t rev;
 	uint16_t sclk, mclk;
 
-	if (rdev->bios == NULL)
-		return false;
-
 	pll_info = combios_get_table_offset(dev, COMBIOS_PLL_INFO_TABLE);
 	if (pll_info) {
 		rev = RBIOS8(pll_info);
@@ -654,9 +748,6 @@
 	if (!p_dac)
 		return NULL;
 
-	if (rdev->bios == NULL)
-		goto out;
-
 	/* check CRT table */
 	dac_info = combios_get_table_offset(dev, COMBIOS_CRT_INFO_TABLE);
 	if (dac_info) {
@@ -673,7 +764,6 @@
 		found = 1;
 	}
 
-out:
 	if (!found) /* fallback to defaults */
 		radeon_legacy_get_primary_dac_info_from_table(rdev, p_dac);
 
@@ -687,9 +777,6 @@
 	uint16_t tv_info;
 	enum radeon_tv_std tv_std = TV_STD_NTSC;
 
-	if (rdev->bios == NULL)
-		return tv_std;
-
 	tv_info = combios_get_table_offset(dev, COMBIOS_TV_INFO_TABLE);
 	if (tv_info) {
 		if (RBIOS8(tv_info + 6) == 'T') {
@@ -793,9 +880,6 @@
 	if (!tv_dac)
 		return NULL;
 
-	if (rdev->bios == NULL)
-		goto out;
-
 	/* first check TV table */
 	dac_info = combios_get_table_offset(dev, COMBIOS_TV_INFO_TABLE);
 	if (dac_info) {
@@ -857,7 +941,6 @@
 		}
 	}
 
-out:
 	if (!found) /* fallback to defaults */
 		radeon_legacy_get_tv_dac_info_from_table(rdev, tv_dac);
 
@@ -945,11 +1028,6 @@
 	int tmp, i;
 	struct radeon_encoder_lvds *lvds = NULL;
 
-	if (rdev->bios == NULL) {
-		lvds = radeon_legacy_get_lvds_info_from_regs(rdev);
-		goto out;
-	}
-
 	lcd_info = combios_get_table_offset(dev, COMBIOS_LCD_INFO_TABLE);
 
 	if (lcd_info) {
@@ -1050,7 +1128,7 @@
 		DRM_INFO("No panel info found in BIOS\n");
 		lvds = radeon_legacy_get_lvds_info_from_regs(rdev);
 	}
-out:
+
 	if (lvds)
 		encoder->native_mode = lvds->native_mode;
 	return lvds;
@@ -1102,9 +1180,6 @@
 	int i, n;
 	uint8_t ver;
 
-	if (rdev->bios == NULL)
-		return false;
-
 	tmds_info = combios_get_table_offset(dev, COMBIOS_DFP_INFO_TABLE);
 
 	if (tmds_info) {
@@ -1184,9 +1259,6 @@
 	enum radeon_combios_ddc gpio;
 	struct radeon_i2c_bus_rec i2c_bus;
 
-	if (rdev->bios == NULL)
-		return false;
-
 	tmds->i2c_bus = NULL;
 	if (rdev->flags & RADEON_IS_IGP) {
 		offset = combios_get_table_offset(dev, COMBIOS_I2C_INFO_TABLE);
@@ -1253,7 +1325,10 @@
 				tmds->i2c_bus = radeon_i2c_create(dev, &i2c_bus, "DVO");
 				break;
 			case DDC_LCD: /* MM i2c */
-				DRM_ERROR("MM i2c requires hw i2c engine\n");
+				i2c_bus.valid = true;
+				i2c_bus.hw_capable = true;
+				i2c_bus.mm_i2c = true;
+				tmds->i2c_bus = radeon_i2c_create(dev, &i2c_bus, "DVO");
 				break;
 			default:
 				DRM_ERROR("Unsupported gpio %d\n", gpio);
@@ -1909,9 +1984,6 @@
 	struct radeon_i2c_bus_rec ddc_i2c;
 	struct radeon_hpd hpd;
 
-	if (rdev->bios == NULL)
-		return false;
-
 	conn_info = combios_get_table_offset(dev, COMBIOS_CONNECTOR_INFO_TABLE);
 	if (conn_info) {
 		for (i = 0; i < 4; i++) {
@@ -2278,6 +2350,115 @@
 	return true;
 }
 
+void radeon_combios_get_power_modes(struct radeon_device *rdev)
+{
+	struct drm_device *dev = rdev->ddev;
+	u16 offset, misc, misc2 = 0;
+	u8 rev, blocks, tmp;
+	int state_index = 0;
+
+	rdev->pm.default_power_state = NULL;
+
+	if (rdev->flags & RADEON_IS_MOBILITY) {
+		offset = combios_get_table_offset(dev, COMBIOS_POWERPLAY_INFO_TABLE);
+		if (offset) {
+			rev = RBIOS8(offset);
+			blocks = RBIOS8(offset + 0x2);
+			/* power mode 0 tends to be the only valid one */
+			rdev->pm.power_state[state_index].num_clock_modes = 1;
+			rdev->pm.power_state[state_index].clock_info[0].mclk = RBIOS32(offset + 0x5 + 0x2);
+			rdev->pm.power_state[state_index].clock_info[0].sclk = RBIOS32(offset + 0x5 + 0x6);
+			if ((rdev->pm.power_state[state_index].clock_info[0].mclk == 0) ||
+			    (rdev->pm.power_state[state_index].clock_info[0].sclk == 0))
+				goto default_mode;
+			/* skip overclock modes for now */
+			if ((rdev->pm.power_state[state_index].clock_info[0].mclk >
+			     rdev->clock.default_mclk + RADEON_MODE_OVERCLOCK_MARGIN) ||
+			    (rdev->pm.power_state[state_index].clock_info[0].sclk >
+			     rdev->clock.default_sclk + RADEON_MODE_OVERCLOCK_MARGIN))
+				goto default_mode;
+			rdev->pm.power_state[state_index].type =
+				POWER_STATE_TYPE_BATTERY;
+			misc = RBIOS16(offset + 0x5 + 0x0);
+			if (rev > 4)
+				misc2 = RBIOS16(offset + 0x5 + 0xe);
+			if (misc & 0x4) {
+				rdev->pm.power_state[state_index].clock_info[0].voltage.type = VOLTAGE_GPIO;
+				if (misc & 0x8)
+					rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+						true;
+				else
+					rdev->pm.power_state[state_index].clock_info[0].voltage.active_high =
+						false;
+				rdev->pm.power_state[state_index].clock_info[0].voltage.gpio.valid = true;
+				if (rev < 6) {
+					rdev->pm.power_state[state_index].clock_info[0].voltage.gpio.reg =
+						RBIOS16(offset + 0x5 + 0xb) * 4;
+					tmp = RBIOS8(offset + 0x5 + 0xd);
+					rdev->pm.power_state[state_index].clock_info[0].voltage.gpio.mask = (1 << tmp);
+				} else {
+					u8 entries = RBIOS8(offset + 0x5 + 0xb);
+					u16 voltage_table_offset = RBIOS16(offset + 0x5 + 0xc);
+					if (entries && voltage_table_offset) {
+						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio.reg =
+							RBIOS16(voltage_table_offset) * 4;
+						tmp = RBIOS8(voltage_table_offset + 0x2);
+						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio.mask = (1 << tmp);
+					} else
+						rdev->pm.power_state[state_index].clock_info[0].voltage.gpio.valid = false;
+				}
+				switch ((misc2 & 0x700) >> 8) {
+				case 0:
+				default:
+					rdev->pm.power_state[state_index].clock_info[0].voltage.delay = 0;
+					break;
+				case 1:
+					rdev->pm.power_state[state_index].clock_info[0].voltage.delay = 33;
+					break;
+				case 2:
+					rdev->pm.power_state[state_index].clock_info[0].voltage.delay = 66;
+					break;
+				case 3:
+					rdev->pm.power_state[state_index].clock_info[0].voltage.delay = 99;
+					break;
+				case 4:
+					rdev->pm.power_state[state_index].clock_info[0].voltage.delay = 132;
+					break;
+				}
+			} else
+				rdev->pm.power_state[state_index].clock_info[0].voltage.type = VOLTAGE_NONE;
+			if (rev > 6)
+				rdev->pm.power_state[state_index].non_clock_info.pcie_lanes =
+					RBIOS8(offset + 0x5 + 0x10);
+			state_index++;
+		} else {
+			/* XXX figure out some good default low power mode for mobility cards w/out power tables */
+		}
+	} else {
+		/* XXX figure out some good default low power mode for desktop cards */
+	}
+
+default_mode:
+	/* add the default mode */
+	rdev->pm.power_state[state_index].type =
+		POWER_STATE_TYPE_DEFAULT;
+	rdev->pm.power_state[state_index].num_clock_modes = 1;
+	rdev->pm.power_state[state_index].clock_info[0].mclk = rdev->clock.default_mclk;
+	rdev->pm.power_state[state_index].clock_info[0].sclk = rdev->clock.default_sclk;
+	rdev->pm.power_state[state_index].default_clock_mode = &rdev->pm.power_state[state_index].clock_info[0];
+	rdev->pm.power_state[state_index].clock_info[0].voltage.type = VOLTAGE_NONE;
+	if (rdev->asic->get_pcie_lanes)
+		rdev->pm.power_state[state_index].non_clock_info.pcie_lanes = radeon_get_pcie_lanes(rdev);
+	else
+		rdev->pm.power_state[state_index].non_clock_info.pcie_lanes = 16;
+	rdev->pm.default_power_state = &rdev->pm.power_state[state_index];
+	rdev->pm.num_power_states = state_index + 1;
+
+	rdev->pm.current_power_state = rdev->pm.default_power_state;
+	rdev->pm.current_clock_mode =
+		rdev->pm.default_power_state->default_clock_mode;
+}
+
 void radeon_external_tmds_setup(struct drm_encoder *encoder)
 {
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
@@ -2289,23 +2470,21 @@
 	switch (tmds->dvo_chip) {
 	case DVO_SIL164:
 		/* sil 164 */
-		radeon_i2c_do_lock(tmds->i2c_bus, 1);
-		radeon_i2c_sw_put_byte(tmds->i2c_bus,
-				       tmds->slave_addr,
-				       0x08, 0x30);
-		radeon_i2c_sw_put_byte(tmds->i2c_bus,
+		radeon_i2c_put_byte(tmds->i2c_bus,
+				    tmds->slave_addr,
+				    0x08, 0x30);
+		radeon_i2c_put_byte(tmds->i2c_bus,
 				       tmds->slave_addr,
 				       0x09, 0x00);
-		radeon_i2c_sw_put_byte(tmds->i2c_bus,
-				       tmds->slave_addr,
-				       0x0a, 0x90);
-		radeon_i2c_sw_put_byte(tmds->i2c_bus,
-				       tmds->slave_addr,
-				       0x0c, 0x89);
-		radeon_i2c_sw_put_byte(tmds->i2c_bus,
+		radeon_i2c_put_byte(tmds->i2c_bus,
+				    tmds->slave_addr,
+				    0x0a, 0x90);
+		radeon_i2c_put_byte(tmds->i2c_bus,
+				    tmds->slave_addr,
+				    0x0c, 0x89);
+		radeon_i2c_put_byte(tmds->i2c_bus,
 				       tmds->slave_addr,
 				       0x08, 0x3b);
-		radeon_i2c_do_lock(tmds->i2c_bus, 0);
 		break;
 	case DVO_SIL1178:
 		/* sil 1178 - untested */
@@ -2338,9 +2517,6 @@
 	uint32_t reg, val, and_mask, or_mask;
 	struct radeon_encoder_ext_tmds *tmds = radeon_encoder->enc_priv;
 
-	if (rdev->bios == NULL)
-		return false;
-
 	if (!tmds)
 		return false;
 
@@ -2390,11 +2566,9 @@
 						index++;
 						val = RBIOS8(index);
 						index++;
-						radeon_i2c_do_lock(tmds->i2c_bus, 1);
-						radeon_i2c_sw_put_byte(tmds->i2c_bus,
-								       slave_addr,
-								       reg, val);
-						radeon_i2c_do_lock(tmds->i2c_bus, 0);
+						radeon_i2c_put_byte(tmds->i2c_bus,
+								    slave_addr,
+								    reg, val);
 						break;
 					default:
 						DRM_ERROR("Unknown id %d\n", id >> 13);
@@ -2447,11 +2621,9 @@
 					reg = id & 0x1fff;
 					val = RBIOS8(index);
 					index += 1;
-					radeon_i2c_do_lock(tmds->i2c_bus, 1);
-					radeon_i2c_sw_put_byte(tmds->i2c_bus,
-							       tmds->slave_addr,
-							       reg, val);
-					radeon_i2c_do_lock(tmds->i2c_bus, 0);
+					radeon_i2c_put_byte(tmds->i2c_bus,
+							    tmds->slave_addr,
+							    reg, val);
 					break;
 				default:
 					DRM_ERROR("Unknown id %d\n", id >> 13);
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 65f8194..ee0083f 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -479,10 +479,8 @@
 		ret = connector_status_connected;
 	else {
 		if (radeon_connector->ddc_bus) {
-			radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
 			radeon_connector->edid = drm_get_edid(&radeon_connector->base,
 							      &radeon_connector->ddc_bus->adapter);
-			radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
 			if (radeon_connector->edid)
 				ret = connector_status_connected;
 		}
@@ -587,19 +585,14 @@
 	if (!encoder)
 		ret = connector_status_disconnected;
 
-	if (radeon_connector->ddc_bus) {
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
+	if (radeon_connector->ddc_bus)
 		dret = radeon_ddc_probe(radeon_connector);
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
-	}
 	if (dret) {
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
 		}
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
 		radeon_connector->edid = drm_get_edid(&radeon_connector->base, &radeon_connector->ddc_bus->adapter);
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
 
 		if (!radeon_connector->edid) {
 			DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
@@ -744,19 +737,14 @@
 	enum drm_connector_status ret = connector_status_disconnected;
 	bool dret = false;
 
-	if (radeon_connector->ddc_bus) {
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
+	if (radeon_connector->ddc_bus)
 		dret = radeon_ddc_probe(radeon_connector);
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
-	}
 	if (dret) {
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
 		}
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
 		radeon_connector->edid = drm_get_edid(&radeon_connector->base, &radeon_connector->ddc_bus->adapter);
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
 
 		if (!radeon_connector->edid) {
 			DRM_ERROR("%s: probed a monitor but no|invalid EDID\n",
@@ -952,7 +940,7 @@
 	if (radeon_connector->edid)
 		kfree(radeon_connector->edid);
 	if (radeon_dig_connector->dp_i2c_bus)
-		radeon_i2c_destroy(radeon_dig_connector->dp_i2c_bus);
+		radeon_i2c_destroy_dp(radeon_dig_connector->dp_i2c_bus);
 	kfree(radeon_connector->con_priv);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
@@ -988,12 +976,10 @@
 			ret = connector_status_connected;
 		}
 	} else {
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
 		if (radeon_ddc_probe(radeon_connector)) {
 			radeon_dig_connector->dp_sink_type = sink_type;
 			ret = connector_status_connected;
 		}
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 06123ba..dc6eba6 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -1644,6 +1644,7 @@
 	radeon_cp_load_microcode(dev_priv);
 	radeon_cp_init_ring_buffer(dev, dev_priv, file_priv);
 
+	dev_priv->have_z_offset = 0;
 	radeon_do_engine_reset(dev);
 	radeon_irq_set_state(dev, RADEON_SW_INT_ENABLE, 1);
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index e9d0850..70ba02e 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -194,11 +194,8 @@
 	}
 	radeon_bo_list_unreserve(&parser->validated);
 	for (i = 0; i < parser->nrelocs; i++) {
-		if (parser->relocs[i].gobj) {
-			mutex_lock(&parser->rdev->ddev->struct_mutex);
-			drm_gem_object_unreference(parser->relocs[i].gobj);
-			mutex_unlock(&parser->rdev->ddev->struct_mutex);
-		}
+		if (parser->relocs[i].gobj)
+			drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
 	}
 	kfree(parser->track);
 	kfree(parser->relocs);
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 28772a3..b7023ff 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -36,7 +36,14 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	uint32_t cur_lock;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		cur_lock = RREG32(EVERGREEN_CUR_UPDATE + radeon_crtc->crtc_offset);
+		if (lock)
+			cur_lock |= EVERGREEN_CURSOR_UPDATE_LOCK;
+		else
+			cur_lock &= ~EVERGREEN_CURSOR_UPDATE_LOCK;
+		WREG32(EVERGREEN_CUR_UPDATE + radeon_crtc->crtc_offset, cur_lock);
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		cur_lock = RREG32(AVIVO_D1CUR_UPDATE + radeon_crtc->crtc_offset);
 		if (lock)
 			cur_lock |= AVIVO_D1CURSOR_UPDATE_LOCK;
@@ -58,7 +65,10 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
+		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT));
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
 	} else {
@@ -81,10 +91,14 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
+		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN |
+		       EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT));
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN |
-			     (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
+		       (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
 	} else {
 		switch (radeon_crtc->crtc_id) {
 		case 0:
@@ -109,7 +123,10 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, 0);
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, gpu_addr);
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		if (rdev->family >= CHIP_RV770) {
 			if (radeon_crtc->crtc_id)
 				WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, 0);
@@ -169,17 +186,13 @@
 unpin:
 	if (radeon_crtc->cursor_bo) {
 		radeon_gem_object_unpin(radeon_crtc->cursor_bo);
-		mutex_lock(&crtc->dev->struct_mutex);
-		drm_gem_object_unreference(radeon_crtc->cursor_bo);
-		mutex_unlock(&crtc->dev->struct_mutex);
+		drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo);
 	}
 
 	radeon_crtc->cursor_bo = obj;
 	return 0;
 fail:
-	mutex_lock(&crtc->dev->struct_mutex);
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&crtc->dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 
 	return 0;
 }
@@ -201,7 +214,20 @@
 		yorigin = CURSOR_HEIGHT - 1;
 
 	radeon_lock_cursor(crtc, true);
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		/* cursors are offset into the total surface */
+		x += crtc->x;
+		y += crtc->y;
+		DRM_DEBUG("x %d y %d c->x %d c->y %d\n", x, y, crtc->x, crtc->y);
+
+		/* XXX: check if evergreen has the same issues as avivo chips */
+		WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset,
+		       ((xorigin ? 0 : x) << 16) |
+		       (yorigin ? 0 : y));
+		WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
+		WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset,
+		       ((radeon_crtc->cursor_width - 1) << 16) | (radeon_crtc->cursor_height - 1));
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		int w = radeon_crtc->cursor_width;
 		int i = 0;
 		struct drm_crtc *crtc_p;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 768b150..e28e4ed 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -30,6 +30,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/radeon_drm.h>
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "radeon_asic.h"
@@ -100,79 +101,102 @@
 	}
 }
 
-/*
- * MC common functions
+/**
+ * radeon_vram_location - try to find VRAM location
+ * @rdev: radeon device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ * @base: base address at which to put VRAM
+ *
+ * Function will place try to place VRAM at base address provided
+ * as parameter (which is so far either PCI aperture address or
+ * for IGP TOM base address).
+ *
+ * If there is not enough space to fit the unvisible VRAM in the 32bits
+ * address space then we limit the VRAM size to the aperture.
+ *
+ * If we are using AGP and if the AGP aperture doesn't allow us to have
+ * room for all the VRAM than we restrict the VRAM to the PCI aperture
+ * size and print a warning.
+ *
+ * This function will never fails, worst case are limiting VRAM.
+ *
+ * Note: GTT start, end, size should be initialized before calling this
+ * function on AGP platform.
+ *
+ * Note: We don't explictly enforce VRAM start to be aligned on VRAM size,
+ * this shouldn't be a problem as we are using the PCI aperture as a reference.
+ * Otherwise this would be needed for rv280, all r3xx, and all r4xx, but
+ * not IGP.
+ *
+ * Note: we use mc_vram_size as on some board we need to program the mc to
+ * cover the whole aperture even if VRAM size is inferior to aperture size
+ * Novell bug 204882 + along with lots of ubuntu ones
+ *
+ * Note: when limiting vram it's safe to overwritte real_vram_size because
+ * we are not in case where real_vram_size is inferior to mc_vram_size (ie
+ * note afected by bogus hw of Novell bug 204882 + along with lots of ubuntu
+ * ones)
+ *
+ * Note: IGP TOM addr should be the same as the aperture addr, we don't
+ * explicitly check for that thought.
+ *
+ * FIXME: when reducing VRAM size align new size on power of 2.
  */
-int radeon_mc_setup(struct radeon_device *rdev)
+void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base)
 {
-	uint32_t tmp;
-
-	/* Some chips have an "issue" with the memory controller, the
-	 * location must be aligned to the size. We just align it down,
-	 * too bad if we walk over the top of system memory, we don't
-	 * use DMA without a remapped anyway.
-	 * Affected chips are rv280, all r3xx, and all r4xx, but not IGP
-	 */
-	/* FGLRX seems to setup like this, VRAM a 0, then GART.
-	 */
-	/*
-	 * Note: from R6xx the address space is 40bits but here we only
-	 * use 32bits (still have to see a card which would exhaust 4G
-	 * address space).
-	 */
-	if (rdev->mc.vram_location != 0xFFFFFFFFUL) {
-		/* vram location was already setup try to put gtt after
-		 * if it fits */
-		tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size;
-		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
-		if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) {
-			rdev->mc.gtt_location = tmp;
-		} else {
-			if (rdev->mc.gtt_size >= rdev->mc.vram_location) {
-				printk(KERN_ERR "[drm] GTT too big to fit "
-				       "before or after vram location.\n");
-				return -EINVAL;
-			}
-			rdev->mc.gtt_location = 0;
-		}
-	} else if (rdev->mc.gtt_location != 0xFFFFFFFFUL) {
-		/* gtt location was already setup try to put vram before
-		 * if it fits */
-		if (rdev->mc.mc_vram_size < rdev->mc.gtt_location) {
-			rdev->mc.vram_location = 0;
-		} else {
-			tmp = rdev->mc.gtt_location + rdev->mc.gtt_size;
-			tmp += (rdev->mc.mc_vram_size - 1);
-			tmp &= ~(rdev->mc.mc_vram_size - 1);
-			if ((0xFFFFFFFFUL - tmp) >= rdev->mc.mc_vram_size) {
-				rdev->mc.vram_location = tmp;
-			} else {
-				printk(KERN_ERR "[drm] vram too big to fit "
-				       "before or after GTT location.\n");
-				return -EINVAL;
-			}
-		}
-	} else {
-		rdev->mc.vram_location = 0;
-		tmp = rdev->mc.mc_vram_size;
-		tmp = (tmp + rdev->mc.gtt_size - 1) & ~(rdev->mc.gtt_size - 1);
-		rdev->mc.gtt_location = tmp;
+	mc->vram_start = base;
+	if (mc->mc_vram_size > (0xFFFFFFFF - base + 1)) {
+		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
+		mc->real_vram_size = mc->aper_size;
+		mc->mc_vram_size = mc->aper_size;
 	}
-	rdev->mc.vram_start = rdev->mc.vram_location;
-	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-	rdev->mc.gtt_start = rdev->mc.gtt_location;
-	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-	DRM_INFO("radeon: VRAM %uM\n", (unsigned)(rdev->mc.mc_vram_size >> 20));
-	DRM_INFO("radeon: VRAM from 0x%08X to 0x%08X\n",
-		 (unsigned)rdev->mc.vram_location,
-		 (unsigned)(rdev->mc.vram_location + rdev->mc.mc_vram_size - 1));
-	DRM_INFO("radeon: GTT %uM\n", (unsigned)(rdev->mc.gtt_size >> 20));
-	DRM_INFO("radeon: GTT from 0x%08X to 0x%08X\n",
-		 (unsigned)rdev->mc.gtt_location,
-		 (unsigned)(rdev->mc.gtt_location + rdev->mc.gtt_size - 1));
-	return 0;
+	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+	if (rdev->flags & RADEON_IS_AGP && mc->vram_end > mc->gtt_start && mc->vram_end <= mc->gtt_end) {
+		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
+		mc->real_vram_size = mc->aper_size;
+		mc->mc_vram_size = mc->aper_size;
+	}
+	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+	dev_info(rdev->dev, "VRAM: %lluM 0x%08llX - 0x%08llX (%lluM used)\n",
+			mc->mc_vram_size >> 20, mc->vram_start,
+			mc->vram_end, mc->real_vram_size >> 20);
 }
 
+/**
+ * radeon_gtt_location - try to find GTT location
+ * @rdev: radeon device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ *
+ * Function will place try to place GTT before or after VRAM.
+ *
+ * If GTT size is bigger than space left then we ajust GTT size.
+ * Thus function will never fails.
+ *
+ * FIXME: when reducing GTT size align new size on power of 2.
+ */
+void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
+{
+	u64 size_af, size_bf;
+
+	size_af = 0xFFFFFFFF - mc->vram_end;
+	size_bf = mc->vram_start;
+	if (size_bf > size_af) {
+		if (mc->gtt_size > size_bf) {
+			dev_warn(rdev->dev, "limiting GTT\n");
+			mc->gtt_size = size_bf;
+		}
+		mc->gtt_start = mc->vram_start - mc->gtt_size;
+	} else {
+		if (mc->gtt_size > size_af) {
+			dev_warn(rdev->dev, "limiting GTT\n");
+			mc->gtt_size = size_af;
+		}
+		mc->gtt_start = mc->vram_end + 1;
+	}
+	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
+	dev_info(rdev->dev, "GTT: %lluM 0x%08llX - 0x%08llX\n",
+			mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
+}
 
 /*
  * GPU helpers function.
@@ -182,7 +206,16 @@
 	uint32_t reg;
 
 	/* first check CRTCs */
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+		if (reg & EVERGREEN_CRTC_MASTER_EN)
+			return true;
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		reg = RREG32(AVIVO_D1CRTC_CONTROL) |
 		      RREG32(AVIVO_D2CRTC_CONTROL);
 		if (reg & AVIVO_CRTC_EN) {
@@ -229,6 +262,8 @@
 
 int radeon_dummy_page_init(struct radeon_device *rdev)
 {
+	if (rdev->dummy_page.page)
+		return 0;
 	rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
 	if (rdev->dummy_page.page == NULL)
 		return -ENOMEM;
@@ -310,7 +345,7 @@
 		rdev->mc_rreg = &rs600_mc_rreg;
 		rdev->mc_wreg = &rs600_mc_wreg;
 	}
-	if (rdev->family >= CHIP_R600) {
+	if ((rdev->family >= CHIP_R600) && (rdev->family <= CHIP_RV740)) {
 		rdev->pciep_rreg = &r600_pciep_rreg;
 		rdev->pciep_wreg = &r600_pciep_wreg;
 	}
@@ -329,21 +364,22 @@
 	case CHIP_RS100:
 	case CHIP_RV200:
 	case CHIP_RS200:
+		rdev->asic = &r100_asic;
+		break;
 	case CHIP_R200:
 	case CHIP_RV250:
 	case CHIP_RS300:
 	case CHIP_RV280:
-		rdev->asic = &r100_asic;
+		rdev->asic = &r200_asic;
 		break;
 	case CHIP_R300:
 	case CHIP_R350:
 	case CHIP_RV350:
 	case CHIP_RV380:
-		rdev->asic = &r300_asic;
-		if (rdev->flags & RADEON_IS_PCIE) {
-			rdev->asic->gart_tlb_flush = &rv370_pcie_gart_tlb_flush;
-			rdev->asic->gart_set_page = &rv370_pcie_gart_set_page;
-		}
+		if (rdev->flags & RADEON_IS_PCIE)
+			rdev->asic = &r300_asic_pcie;
+		else
+			rdev->asic = &r300_asic;
 		break;
 	case CHIP_R420:
 	case CHIP_R423:
@@ -387,6 +423,13 @@
 	case CHIP_RV740:
 		rdev->asic = &rv770_asic;
 		break;
+	case CHIP_CEDAR:
+	case CHIP_REDWOOD:
+	case CHIP_JUNIPER:
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+		rdev->asic = &evergreen_asic;
+		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
@@ -613,6 +656,36 @@
 	}
 }
 
+static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct radeon_device *rdev = dev->dev_private;
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_INFO "radeon: switched on\n");
+		/* don't suspend or resume card normally */
+		rdev->powered_down = false;
+		radeon_resume_kms(dev);
+	} else {
+		printk(KERN_INFO "radeon: switched off\n");
+		radeon_suspend_kms(dev, pmm);
+		/* don't suspend or resume card normally */
+		rdev->powered_down = true;
+	}
+}
+
+static bool radeon_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
+
 int radeon_device_init(struct radeon_device *rdev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
@@ -638,11 +711,14 @@
 	mutex_init(&rdev->cs_mutex);
 	mutex_init(&rdev->ib_pool.mutex);
 	mutex_init(&rdev->cp.mutex);
+	mutex_init(&rdev->dc_hw_i2c_mutex);
 	if (rdev->family >= CHIP_R600)
 		spin_lock_init(&rdev->ih.lock);
 	mutex_init(&rdev->gem.mutex);
+	mutex_init(&rdev->pm.mutex);
 	rwlock_init(&rdev->fence_drv.lock);
 	INIT_LIST_HEAD(&rdev->gem.objects);
+	init_waitqueue_head(&rdev->irq.vblank_queue);
 
 	/* setup workqueue */
 	rdev->wq = create_workqueue("radeon");
@@ -692,6 +768,9 @@
 	/* this will fail for cards that aren't VGA class devices, just
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
+	vga_switcheroo_register_client(rdev->pdev,
+				       radeon_switcheroo_set_state,
+				       radeon_switcheroo_can_switch);
 
 	r = radeon_init(rdev);
 	if (r)
@@ -723,6 +802,7 @@
 	rdev->shutdown = true;
 	radeon_fini(rdev);
 	destroy_workqueue(rdev->wq);
+	vga_switcheroo_unregister_client(rdev->pdev);
 	vga_client_register(rdev->pdev, NULL, NULL, NULL);
 	iounmap(rdev->rmmio);
 	rdev->rmmio = NULL;
@@ -746,6 +826,8 @@
 	}
 	rdev = dev->dev_private;
 
+	if (rdev->powered_down)
+		return 0;
 	/* unpin the front buffers */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->fb);
@@ -791,6 +873,9 @@
 {
 	struct radeon_device *rdev = dev->dev_private;
 
+	if (rdev->powered_down)
+		return 0;
+
 	acquire_console_sem();
 	pci_set_power_state(dev->pdev, PCI_D0);
 	pci_restore_state(dev->pdev);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 7e17a36..ba8d806 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -68,6 +68,36 @@
 	WREG32(AVIVO_D1GRPH_LUT_SEL + radeon_crtc->crtc_offset, radeon_crtc->crtc_id);
 }
 
+static void evergreen_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int i;
+
+	DRM_DEBUG("%d\n", radeon_crtc->crtc_id);
+	WREG32(EVERGREEN_DC_LUT_CONTROL + radeon_crtc->crtc_offset, 0);
+
+	WREG32(EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_DC_LUT_BLACK_OFFSET_RED + radeon_crtc->crtc_offset, 0);
+
+	WREG32(EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE + radeon_crtc->crtc_offset, 0xffff);
+	WREG32(EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN + radeon_crtc->crtc_offset, 0xffff);
+	WREG32(EVERGREEN_DC_LUT_WHITE_OFFSET_RED + radeon_crtc->crtc_offset, 0xffff);
+
+	WREG32(EVERGREEN_DC_LUT_RW_MODE, radeon_crtc->crtc_id);
+	WREG32(EVERGREEN_DC_LUT_WRITE_EN_MASK, 0x00000007);
+
+	WREG32(EVERGREEN_DC_LUT_RW_INDEX, 0);
+	for (i = 0; i < 256; i++) {
+		WREG32(EVERGREEN_DC_LUT_30_COLOR,
+		       (radeon_crtc->lut_r[i] << 20) |
+		       (radeon_crtc->lut_g[i] << 10) |
+		       (radeon_crtc->lut_b[i] << 0));
+	}
+}
+
 static void legacy_crtc_load_lut(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
@@ -100,7 +130,9 @@
 	if (!crtc->enabled)
 		return;
 
-	if (ASIC_IS_AVIVO(rdev))
+	if (ASIC_IS_DCE4(rdev))
+		evergreen_crtc_load_lut(crtc);
+	else if (ASIC_IS_AVIVO(rdev))
 		avivo_crtc_load_lut(crtc);
 	else
 		legacy_crtc_load_lut(crtc);
@@ -361,6 +393,8 @@
 
 int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 {
+	struct drm_device *dev = radeon_connector->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
 	int ret = 0;
 
 	if ((radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
@@ -373,11 +407,11 @@
 	if (!radeon_connector->ddc_bus)
 		return -1;
 	if (!radeon_connector->edid) {
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
 		radeon_connector->edid = drm_get_edid(&radeon_connector->base, &radeon_connector->ddc_bus->adapter);
-		radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
 	}
-
+	/* some servers provide a hardcoded edid in rom for KVMs */
+	if (!radeon_connector->edid)
+		radeon_connector->edid = radeon_combios_get_hardcoded_edid(rdev);
 	if (radeon_connector->edid) {
 		drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid);
 		ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid);
@@ -395,9 +429,7 @@
 
 	if (!radeon_connector->ddc_bus)
 		return -1;
-	radeon_i2c_do_lock(radeon_connector->ddc_bus, 1);
 	edid = drm_get_edid(connector, &radeon_connector->ddc_bus->adapter);
-	radeon_i2c_do_lock(radeon_connector->ddc_bus, 0);
 	if (edid) {
 		kfree(edid);
 	}
@@ -414,13 +446,13 @@
 	return n;
 }
 
-void radeon_compute_pll(struct radeon_pll *pll,
-			uint64_t freq,
-			uint32_t *dot_clock_p,
-			uint32_t *fb_div_p,
-			uint32_t *frac_fb_div_p,
-			uint32_t *ref_div_p,
-			uint32_t *post_div_p)
+static void radeon_compute_pll_legacy(struct radeon_pll *pll,
+				      uint64_t freq,
+				      uint32_t *dot_clock_p,
+				      uint32_t *fb_div_p,
+				      uint32_t *frac_fb_div_p,
+				      uint32_t *ref_div_p,
+				      uint32_t *post_div_p)
 {
 	uint32_t min_ref_div = pll->min_ref_div;
 	uint32_t max_ref_div = pll->max_ref_div;
@@ -580,95 +612,194 @@
 	*post_div_p = best_post_div;
 }
 
-void radeon_compute_pll_avivo(struct radeon_pll *pll,
-			      uint64_t freq,
-			      uint32_t *dot_clock_p,
-			      uint32_t *fb_div_p,
-			      uint32_t *frac_fb_div_p,
-			      uint32_t *ref_div_p,
-			      uint32_t *post_div_p)
+static bool
+calc_fb_div(struct radeon_pll *pll,
+	    uint32_t freq,
+            uint32_t post_div,
+            uint32_t ref_div,
+            uint32_t *fb_div,
+            uint32_t *fb_div_frac)
 {
-	fixed20_12 m, n, frac_n, p, f_vco, f_pclk, best_freq;
-	fixed20_12 pll_out_max, pll_out_min;
-	fixed20_12 pll_in_max, pll_in_min;
-	fixed20_12 reference_freq;
-	fixed20_12 error, ffreq, a, b;
+	fixed20_12 feedback_divider, a, b;
+	u32 vco_freq;
 
-	pll_out_max.full = rfixed_const(pll->pll_out_max);
-	pll_out_min.full = rfixed_const(pll->pll_out_min);
-	pll_in_max.full = rfixed_const(pll->pll_in_max);
-	pll_in_min.full = rfixed_const(pll->pll_in_min);
-	reference_freq.full = rfixed_const(pll->reference_freq);
-	do_div(freq, 10);
+	vco_freq = freq * post_div;
+	/* feedback_divider = vco_freq * ref_div / pll->reference_freq; */
+	a.full = rfixed_const(pll->reference_freq);
+	feedback_divider.full = rfixed_const(vco_freq);
+	feedback_divider.full = rfixed_div(feedback_divider, a);
+	a.full = rfixed_const(ref_div);
+	feedback_divider.full = rfixed_mul(feedback_divider, a);
+
+	if (pll->flags & RADEON_PLL_USE_FRAC_FB_DIV) {
+		/* feedback_divider = floor((feedback_divider * 10.0) + 0.5) * 0.1; */
+		a.full = rfixed_const(10);
+		feedback_divider.full = rfixed_mul(feedback_divider, a);
+		feedback_divider.full += rfixed_const_half(0);
+		feedback_divider.full = rfixed_floor(feedback_divider);
+		feedback_divider.full = rfixed_div(feedback_divider, a);
+
+		/* *fb_div = floor(feedback_divider); */
+		a.full = rfixed_floor(feedback_divider);
+		*fb_div = rfixed_trunc(a);
+		/* *fb_div_frac = fmod(feedback_divider, 1.0) * 10.0; */
+		a.full = rfixed_const(10);
+		b.full = rfixed_mul(feedback_divider, a);
+
+		feedback_divider.full = rfixed_floor(feedback_divider);
+		feedback_divider.full = rfixed_mul(feedback_divider, a);
+		feedback_divider.full = b.full - feedback_divider.full;
+		*fb_div_frac = rfixed_trunc(feedback_divider);
+	} else {
+		/* *fb_div = floor(feedback_divider + 0.5); */
+		feedback_divider.full += rfixed_const_half(0);
+		feedback_divider.full = rfixed_floor(feedback_divider);
+
+		*fb_div = rfixed_trunc(feedback_divider);
+		*fb_div_frac = 0;
+	}
+
+	if (((*fb_div) < pll->min_feedback_div) || ((*fb_div) > pll->max_feedback_div))
+		return false;
+	else
+		return true;
+}
+
+static bool
+calc_fb_ref_div(struct radeon_pll *pll,
+		uint32_t freq,
+		uint32_t post_div,
+		uint32_t *fb_div,
+                uint32_t *fb_div_frac,
+                uint32_t *ref_div)
+{
+	fixed20_12 ffreq, max_error, error, pll_out, a;
+	u32 vco;
+
 	ffreq.full = rfixed_const(freq);
-	error.full = rfixed_const(100 * 100);
+	/* max_error = ffreq * 0.0025; */
+	a.full = rfixed_const(400);
+	max_error.full = rfixed_div(ffreq, a);
 
-	/* max p */
-	p.full = rfixed_div(pll_out_max, ffreq);
-	p.full = rfixed_floor(p);
+	for ((*ref_div) = pll->min_ref_div; (*ref_div) < pll->max_ref_div; ++(*ref_div)) {
+		if (calc_fb_div(pll, freq, post_div, (*ref_div), fb_div, fb_div_frac)) {
+			vco = pll->reference_freq * (((*fb_div) * 10) + (*fb_div_frac));
+			vco = vco / ((*ref_div) * 10);
 
-	/* min m */
-	m.full = rfixed_div(reference_freq, pll_in_max);
-	m.full = rfixed_ceil(m);
+			if ((vco < pll->pll_out_min) || (vco > pll->pll_out_max))
+				continue;
 
-	while (1) {
-		n.full = rfixed_div(ffreq, reference_freq);
-		n.full = rfixed_mul(n, m);
-		n.full = rfixed_mul(n, p);
+			/* pll_out = vco / post_div; */
+			a.full = rfixed_const(post_div);
+			pll_out.full = rfixed_const(vco);
+			pll_out.full = rfixed_div(pll_out, a);
 
-		f_vco.full = rfixed_div(n, m);
-		f_vco.full = rfixed_mul(f_vco, reference_freq);
+			if (pll_out.full >= ffreq.full) {
+				error.full = pll_out.full - ffreq.full;
+				if (error.full <= max_error.full)
+					return true;
+			}
+		}
+	}
+	return false;
+}
 
-		f_pclk.full = rfixed_div(f_vco, p);
+static void radeon_compute_pll_new(struct radeon_pll *pll,
+				   uint64_t freq,
+				   uint32_t *dot_clock_p,
+				   uint32_t *fb_div_p,
+				   uint32_t *frac_fb_div_p,
+				   uint32_t *ref_div_p,
+				   uint32_t *post_div_p)
+{
+	u32 fb_div = 0, fb_div_frac = 0, post_div = 0, ref_div = 0;
+	u32 best_freq = 0, vco_frequency;
 
-		if (f_pclk.full > ffreq.full)
-			error.full = f_pclk.full - ffreq.full;
-		else
-			error.full = ffreq.full - f_pclk.full;
-		error.full = rfixed_div(error, f_pclk);
-		a.full = rfixed_const(100 * 100);
-		error.full = rfixed_mul(error, a);
+	/* freq = freq / 10; */
+	do_div(freq, 10);
 
-		a.full = rfixed_mul(m, p);
-		a.full = rfixed_div(n, a);
-		best_freq.full = rfixed_mul(reference_freq, a);
+	if (pll->flags & RADEON_PLL_USE_POST_DIV) {
+		post_div = pll->post_div;
+		if ((post_div < pll->min_post_div) || (post_div > pll->max_post_div))
+			goto done;
 
-		if (rfixed_trunc(error) < 25)
-			break;
+		vco_frequency = freq * post_div;
+		if ((vco_frequency < pll->pll_out_min) || (vco_frequency > pll->pll_out_max))
+			goto done;
 
-		a.full = rfixed_const(1);
-		m.full = m.full + a.full;
-		a.full = rfixed_div(reference_freq, m);
-		if (a.full >= pll_in_min.full)
-			continue;
+		if (pll->flags & RADEON_PLL_USE_REF_DIV) {
+			ref_div = pll->reference_div;
+			if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div))
+				goto done;
+			if (!calc_fb_div(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac))
+				goto done;
+		}
+	} else {
+		for (post_div = pll->max_post_div; post_div >= pll->min_post_div; --post_div) {
+			if (pll->flags & RADEON_PLL_LEGACY) {
+				if ((post_div == 5) ||
+				    (post_div == 7) ||
+				    (post_div == 9) ||
+				    (post_div == 10) ||
+				    (post_div == 11))
+					continue;
+			}
 
-		m.full = rfixed_div(reference_freq, pll_in_max);
-		m.full = rfixed_ceil(m);
-		a.full= rfixed_const(1);
-		p.full = p.full - a.full;
-		a.full = rfixed_mul(p, ffreq);
-		if (a.full >= pll_out_min.full)
-			continue;
-		else {
-			DRM_ERROR("Unable to find pll dividers\n");
-			break;
+			if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
+				continue;
+
+			vco_frequency = freq * post_div;
+			if ((vco_frequency < pll->pll_out_min) || (vco_frequency > pll->pll_out_max))
+				continue;
+			if (pll->flags & RADEON_PLL_USE_REF_DIV) {
+				ref_div = pll->reference_div;
+				if ((ref_div < pll->min_ref_div) || (ref_div > pll->max_ref_div))
+					goto done;
+				if (calc_fb_div(pll, freq, post_div, ref_div, &fb_div, &fb_div_frac))
+					break;
+			} else {
+				if (calc_fb_ref_div(pll, freq, post_div, &fb_div, &fb_div_frac, &ref_div))
+					break;
+			}
 		}
 	}
 
-	a.full = rfixed_const(10);
-	b.full = rfixed_mul(n, a);
+	best_freq = pll->reference_freq * 10 * fb_div;
+	best_freq += pll->reference_freq * fb_div_frac;
+	best_freq = best_freq / (ref_div * post_div);
 
-	frac_n.full = rfixed_floor(n);
-	frac_n.full = rfixed_mul(frac_n, a);
-	frac_n.full = b.full - frac_n.full;
+done:
+	if (best_freq == 0)
+		DRM_ERROR("Couldn't find valid PLL dividers\n");
 
-	*dot_clock_p = rfixed_trunc(best_freq);
-	*fb_div_p = rfixed_trunc(n);
-	*frac_fb_div_p = rfixed_trunc(frac_n);
-	*ref_div_p = rfixed_trunc(m);
-	*post_div_p = rfixed_trunc(p);
+	*dot_clock_p = best_freq / 10;
+	*fb_div_p = fb_div;
+	*frac_fb_div_p = fb_div_frac;
+	*ref_div_p = ref_div;
+	*post_div_p = post_div;
 
-	DRM_DEBUG("%u %d.%d, %d, %d\n", *dot_clock_p * 10, *fb_div_p, *frac_fb_div_p, *ref_div_p, *post_div_p);
+	DRM_DEBUG("%u %d.%d, %d, %d\n", *dot_clock_p, *fb_div_p, *frac_fb_div_p, *ref_div_p, *post_div_p);
+}
+
+void radeon_compute_pll(struct radeon_pll *pll,
+			uint64_t freq,
+			uint32_t *dot_clock_p,
+			uint32_t *fb_div_p,
+			uint32_t *frac_fb_div_p,
+			uint32_t *ref_div_p,
+			uint32_t *post_div_p)
+{
+	switch (pll->algo) {
+	case PLL_ALGO_NEW:
+		radeon_compute_pll_new(pll, freq, dot_clock_p, fb_div_p,
+				       frac_fb_div_p, ref_div_p, post_div_p);
+		break;
+	case PLL_ALGO_LEGACY:
+	default:
+		radeon_compute_pll_legacy(pll, freq, dot_clock_p, fb_div_p,
+					  frac_fb_div_p, ref_div_p, post_div_p);
+		break;
+	}
 }
 
 static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
@@ -679,11 +810,8 @@
 	if (fb->fbdev)
 		radeonfb_remove(dev, fb);
 
-	if (radeon_fb->obj) {
-		mutex_lock(&dev->struct_mutex);
-		drm_gem_object_unreference(radeon_fb->obj);
-		mutex_unlock(&dev->struct_mutex);
-	}
+	if (radeon_fb->obj)
+		drm_gem_object_unreference_unlocked(radeon_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(radeon_fb);
 }
@@ -819,7 +947,7 @@
 
 int radeon_modeset_init(struct radeon_device *rdev)
 {
-	int num_crtc = 2, i;
+	int i;
 	int ret;
 
 	drm_mode_config_init(rdev->ddev);
@@ -842,11 +970,23 @@
 		return ret;
 	}
 
+	/* check combios for a valid hardcoded EDID - Sun servers */
+	if (!rdev->is_atom_bios) {
+		/* check for hardcoded EDID in BIOS */
+		radeon_combios_check_hardcoded_edid(rdev);
+	}
+
 	if (rdev->flags & RADEON_SINGLE_CRTC)
-		num_crtc = 1;
+		rdev->num_crtc = 1;
+	else {
+		if (ASIC_IS_DCE4(rdev))
+			rdev->num_crtc = 6;
+		else
+			rdev->num_crtc = 2;
+	}
 
 	/* allocate crtcs */
-	for (i = 0; i < num_crtc; i++) {
+	for (i = 0; i < rdev->num_crtc; i++) {
 		radeon_crtc_init(rdev->ddev, i);
 	}
 
@@ -863,6 +1003,8 @@
 
 void radeon_modeset_fini(struct radeon_device *rdev)
 {
+	kfree(rdev->mode_info.bios_hardcoded_edid);
+
 	if (rdev->mode_info.mode_config_initialized) {
 		radeon_hpd_fini(rdev);
 		drm_mode_config_cleanup(rdev->ddev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8ba3de7..6eec0ec 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -40,9 +40,11 @@
 
 /*
  * KMS wrapper.
+ * - 2.0.0 - initial interface
+ * - 2.1.0 - add square tiling interface
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	0
+#define KMS_DRIVER_MINOR	1
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -86,7 +88,8 @@
 int radeon_testing = 0;
 int radeon_connector_table = 0;
 int radeon_tv = 1;
-int radeon_new_pll = 1;
+int radeon_new_pll = -1;
+int radeon_dynpm = -1;
 int radeon_audio = 1;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
@@ -122,9 +125,12 @@
 MODULE_PARM_DESC(tv, "TV enable (0 = disable)");
 module_param_named(tv, radeon_tv, int, 0444);
 
-MODULE_PARM_DESC(new_pll, "Select new PLL code for AVIVO chips");
+MODULE_PARM_DESC(new_pll, "Select new PLL code");
 module_param_named(new_pll, radeon_new_pll, int, 0444);
 
+MODULE_PARM_DESC(dynpm, "Disable/Enable dynamic power management (1 = enable)");
+module_param_named(dynpm, radeon_dynpm, int, 0444);
+
 MODULE_PARM_DESC(audio, "Audio enable (0 = disable)");
 module_param_named(audio, radeon_audio, int, 0444);
 
@@ -339,6 +345,7 @@
 		driver = &kms_driver;
 		driver->driver_features |= DRIVER_MODESET;
 		driver->num_ioctls = radeon_max_kms_ioctl;
+		radeon_register_atpx_handler();
 	}
 	/* if the vga console setting is enabled still
 	 * let modprobe override it */
@@ -348,6 +355,7 @@
 static void __exit radeon_exit(void)
 {
 	drm_exit(driver);
+	radeon_unregister_atpx_handler();
 }
 
 module_init(radeon_init);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index c57ad60..ec55f2b 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -268,6 +268,8 @@
 
 	u32 scratch_ages[5];
 
+	int have_z_offset;
+
 	/* starting from here on, data is preserved accross an open */
 	uint32_t flags;		/* see radeon_chip_flags */
 	resource_size_t fb_aper_offset;
@@ -295,6 +297,9 @@
 	int r700_sc_prim_fifo_size;
 	int r700_sc_hiz_tile_fifo_size;
 	int r700_sc_earlyz_tile_fifo_fize;
+	int r600_group_size;
+	int r600_npipes;
+	int r600_nbanks;
 
 	struct mutex cs_mutex;
 	u32 cs_id_scnt;
@@ -310,9 +315,11 @@
 	u32 age;
 } drm_radeon_buf_priv_t;
 
+struct drm_buffer;
+
 typedef struct drm_radeon_kcmd_buffer {
 	int bufsz;
-	char *buf;
+	struct drm_buffer *buffer;
 	int nbox;
 	struct drm_clip_rect __user *boxes;
 } drm_radeon_kcmd_buffer_t;
@@ -455,6 +462,15 @@
 			   int sx, int sy, int dx, int dy,
 			   int w, int h, int src_pitch, int dst_pitch, int cpp);
 
+/* atpx handler */
+#if defined(CONFIG_VGA_SWITCHEROO)
+void radeon_register_atpx_handler(void);
+void radeon_unregister_atpx_handler(void);
+#else
+static inline void radeon_register_atpx_handler(void) {}
+static inline void radeon_unregister_atpx_handler(void) {}
+#endif
+
 /* Flags for stats.boxes
  */
 #define RADEON_BOX_DMA_IDLE      0x1
@@ -2122,4 +2138,32 @@
 	write &= mask;						\
 } while (0)
 
+/**
+ * Copy given number of dwords from drm buffer to the ring buffer.
+ */
+#define OUT_RING_DRM_BUFFER(buf, sz) do {				\
+	int _size = (sz) * 4;						\
+	struct drm_buffer *_buf = (buf);				\
+	int _part_size;							\
+	while (_size > 0) {						\
+		_part_size = _size;					\
+									\
+		if (write + _part_size/4 > mask)			\
+			_part_size = ((mask + 1) - write)*4;		\
+									\
+		if (drm_buffer_index(_buf) + _part_size > PAGE_SIZE)	\
+			_part_size = PAGE_SIZE - drm_buffer_index(_buf);\
+									\
+									\
+									\
+		memcpy(ring + write, &_buf->data[drm_buffer_page(_buf)]	\
+			[drm_buffer_index(_buf)], _part_size);		\
+									\
+		_size -= _part_size;					\
+		write = (write + _part_size/4) & mask;			\
+		drm_buffer_advance(_buf, _part_size);			\
+	}								\
+} while (0)
+
+
 #endif				/* __RADEON_DRV_H__ */
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index 3c91724..bc926ea 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -53,7 +53,7 @@
 	/* DVO requires 2x ppll clocks depending on tmds chip */
 	if (radeon_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT)
 		return index_mask;
-	
+
 	count = -1;
 	list_for_each_entry(clone_encoder, &dev->mode_config.encoder_list, head) {
 		struct radeon_encoder *radeon_clone = to_radeon_encoder(clone_encoder);
@@ -228,6 +228,32 @@
 	return NULL;
 }
 
+static struct radeon_connector_atom_dig *
+radeon_get_atom_connector_priv_from_encoder(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	struct radeon_connector_atom_dig *dig_connector;
+
+	if (!rdev->is_atom_bios)
+		return NULL;
+
+	connector = radeon_get_connector_for_encoder(encoder);
+	if (!connector)
+		return NULL;
+
+	radeon_connector = to_radeon_connector(connector);
+
+	if (!radeon_connector->con_priv)
+		return NULL;
+
+	dig_connector = radeon_connector->con_priv;
+
+	return dig_connector;
+}
+
 static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
@@ -236,6 +262,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 
+	/* adjust pm to upcoming mode change */
+	radeon_pm_compute_clocks(rdev);
+
 	/* set the active encoder to connector routing */
 	radeon_encoder_set_active_device(encoder);
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
@@ -458,34 +487,20 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	struct radeon_connector_atom_dig *dig_connector =
+		radeon_get_atom_connector_priv_from_encoder(encoder);
 	union lvds_encoder_control args;
 	int index = 0;
 	int hdmi_detected = 0;
 	uint8_t frev, crev;
-	struct radeon_encoder_atom_dig *dig;
-	struct drm_connector *connector;
-	struct radeon_connector *radeon_connector;
-	struct radeon_connector_atom_dig *dig_connector;
 
-	connector = radeon_get_connector_for_encoder(encoder);
-	if (!connector)
+	if (!dig || !dig_connector)
 		return;
 
-	radeon_connector = to_radeon_connector(connector);
-
-	if (!radeon_encoder->enc_priv)
-		return;
-
-	dig = radeon_encoder->enc_priv;
-
-	if (!radeon_connector->con_priv)
-		return;
-
-	if (drm_detect_hdmi_monitor(radeon_connector->edid))
+	if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI)
 		hdmi_detected = 1;
 
-	dig_connector = radeon_connector->con_priv;
-
 	memset(&args, 0, sizeof(args));
 
 	switch (radeon_encoder->encoder_id) {
@@ -586,7 +601,7 @@
 {
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector;
-	struct radeon_connector_atom_dig *radeon_dig_connector;
+	struct radeon_connector_atom_dig *dig_connector;
 
 	connector = radeon_get_connector_for_encoder(encoder);
 	if (!connector)
@@ -617,9 +632,9 @@
 		break;
 	case DRM_MODE_CONNECTOR_DisplayPort:
 	case DRM_MODE_CONNECTOR_eDP:
-		radeon_dig_connector = radeon_connector->con_priv;
-		if ((radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
-		    (radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
+		dig_connector = radeon_connector->con_priv;
+		if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+		    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
 			return ATOM_ENCODER_MODE_DP;
 		else if (drm_detect_hdmi_monitor(radeon_connector->edid))
 			return ATOM_ENCODER_MODE_HDMI;
@@ -656,6 +671,18 @@
  * - 2 DIG encoder blocks.
  * DIG1/2 can drive UNIPHY0/1/2 link A or link B
  *
+ * DCE 4.0
+ * - 3 DIG transmitter blocks UNPHY0/1/2 (links A and B).
+ * Supports up to 6 digital outputs
+ * - 6 DIG encoder blocks.
+ * - DIG to PHY mapping is hardcoded
+ * DIG1 drives UNIPHY0 link A, A+B
+ * DIG2 drives UNIPHY0 link B
+ * DIG3 drives UNIPHY1 link A, A+B
+ * DIG4 drives UNIPHY1 link B
+ * DIG5 drives UNIPHY2 link A, A+B
+ * DIG6 drives UNIPHY2 link B
+ *
  * Routing
  * crtc -> dig encoder -> UNIPHY/LVTMA (1 or 2 links)
  * Examples:
@@ -664,88 +691,78 @@
  * crtc0 -> dig1 -> UNIPHY2 link  A   -> LVDS
  * crtc1 -> dig2 -> UNIPHY1 link  B+A -> TMDS/HDMI
  */
-static void
+
+union dig_encoder_control {
+	DIG_ENCODER_CONTROL_PS_ALLOCATION v1;
+	DIG_ENCODER_CONTROL_PARAMETERS_V2 v2;
+	DIG_ENCODER_CONTROL_PARAMETERS_V3 v3;
+};
+
+void
 atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	DIG_ENCODER_CONTROL_PS_ALLOCATION args;
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	struct radeon_connector_atom_dig *dig_connector =
+		radeon_get_atom_connector_priv_from_encoder(encoder);
+	union dig_encoder_control args;
 	int index = 0, num = 0;
 	uint8_t frev, crev;
-	struct radeon_encoder_atom_dig *dig;
-	struct drm_connector *connector;
-	struct radeon_connector *radeon_connector;
-	struct radeon_connector_atom_dig *dig_connector;
 
-	connector = radeon_get_connector_for_encoder(encoder);
-	if (!connector)
+	if (!dig || !dig_connector)
 		return;
 
-	radeon_connector = to_radeon_connector(connector);
-
-	if (!radeon_connector->con_priv)
-		return;
-
-	dig_connector = radeon_connector->con_priv;
-
-	if (!radeon_encoder->enc_priv)
-		return;
-
-	dig = radeon_encoder->enc_priv;
-
 	memset(&args, 0, sizeof(args));
 
-	if (dig->dig_encoder)
-		index = GetIndexIntoMasterTable(COMMAND, DIG2EncoderControl);
-	else
-		index = GetIndexIntoMasterTable(COMMAND, DIG1EncoderControl);
+	if (ASIC_IS_DCE4(rdev))
+		index = GetIndexIntoMasterTable(COMMAND, DIGxEncoderControl);
+	else {
+		if (dig->dig_encoder)
+			index = GetIndexIntoMasterTable(COMMAND, DIG2EncoderControl);
+		else
+			index = GetIndexIntoMasterTable(COMMAND, DIG1EncoderControl);
+	}
 	num = dig->dig_encoder + 1;
 
 	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev);
 
-	args.ucAction = action;
-	args.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+	args.v1.ucAction = action;
+	args.v1.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+	args.v1.ucEncoderMode = atombios_get_encoder_mode(encoder);
 
-	if (ASIC_IS_DCE32(rdev)) {
-		switch (radeon_encoder->encoder_id) {
-		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
-			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
-			break;
-		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
-			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER2;
-			break;
-		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
-			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER3;
-			break;
-		}
+	if (args.v1.ucEncoderMode == ATOM_ENCODER_MODE_DP) {
+		if (dig_connector->dp_clock == 270000)
+			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+		args.v1.ucLaneNum = dig_connector->dp_lane_count;
+	} else if (radeon_encoder->pixel_clock > 165000)
+		args.v1.ucLaneNum = 8;
+	else
+		args.v1.ucLaneNum = 4;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		args.v3.acConfig.ucDigSel = dig->dig_encoder;
+		args.v3.ucBitPerColor = PANEL_8BIT_PER_COLOR;
 	} else {
 		switch (radeon_encoder->encoder_id) {
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
-			args.ucConfig = ATOM_ENCODER_CONFIG_TRANSMITTER1;
+			args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
 			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-			args.ucConfig = ATOM_ENCODER_CONFIG_TRANSMITTER2;
+			args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER2;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER3;
 			break;
 		}
+		if (dig_connector->linkb)
+			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
+		else
+			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
 	}
 
-	args.ucEncoderMode = atombios_get_encoder_mode(encoder);
-
-	if (args.ucEncoderMode == ATOM_ENCODER_MODE_DP) {
-		if (dig_connector->dp_clock == 270000)
-			args.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
-		args.ucLaneNum = dig_connector->dp_lane_count;
-	} else if (radeon_encoder->pixel_clock > 165000)
-		args.ucLaneNum = 8;
-	else
-		args.ucLaneNum = 4;
-
-	if (dig_connector->linkb)
-		args.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
-	else
-		args.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
-
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 
 }
@@ -753,6 +770,7 @@
 union dig_transmitter_control {
 	DIG_TRANSMITTER_CONTROL_PS_ALLOCATION v1;
 	DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 v2;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3;
 };
 
 void
@@ -761,37 +779,29 @@
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	struct radeon_connector_atom_dig *dig_connector =
+		radeon_get_atom_connector_priv_from_encoder(encoder);
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
 	union dig_transmitter_control args;
 	int index = 0, num = 0;
 	uint8_t frev, crev;
-	struct radeon_encoder_atom_dig *dig;
-	struct drm_connector *connector;
-	struct radeon_connector *radeon_connector;
-	struct radeon_connector_atom_dig *dig_connector;
 	bool is_dp = false;
+	int pll_id = 0;
+
+	if (!dig || !dig_connector)
+		return;
 
 	connector = radeon_get_connector_for_encoder(encoder);
-	if (!connector)
-		return;
-
 	radeon_connector = to_radeon_connector(connector);
 
-	if (!radeon_encoder->enc_priv)
-		return;
-
-	dig = radeon_encoder->enc_priv;
-
-	if (!radeon_connector->con_priv)
-		return;
-
-	dig_connector = radeon_connector->con_priv;
-
 	if (atombios_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_DP)
 		is_dp = true;
 
 	memset(&args, 0, sizeof(args));
 
-	if (ASIC_IS_DCE32(rdev))
+	if (ASIC_IS_DCE32(rdev) || ASIC_IS_DCE4(rdev))
 		index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
 	else {
 		switch (radeon_encoder->encoder_id) {
@@ -821,7 +831,54 @@
 		else
 			args.v1.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
 	}
-	if (ASIC_IS_DCE32(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		if (is_dp)
+			args.v3.ucLaneNum = dig_connector->dp_lane_count;
+		else if (radeon_encoder->pixel_clock > 165000)
+			args.v3.ucLaneNum = 8;
+		else
+			args.v3.ucLaneNum = 4;
+
+		if (dig_connector->linkb) {
+			args.v3.acConfig.ucLinkSel = 1;
+			args.v3.acConfig.ucEncoderSel = 1;
+		}
+
+		/* Select the PLL for the PHY
+		 * DP PHY should be clocked from external src if there is
+		 * one.
+		 */
+		if (encoder->crtc) {
+			struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+			pll_id = radeon_crtc->pll_id;
+		}
+		if (is_dp && rdev->clock.dp_extclk)
+			args.v3.acConfig.ucRefClkSource = 2; /* external src */
+		else
+			args.v3.acConfig.ucRefClkSource = pll_id;
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			args.v3.acConfig.ucTransmitterSel = 0;
+			num = 0;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			args.v3.acConfig.ucTransmitterSel = 1;
+			num = 1;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			args.v3.acConfig.ucTransmitterSel = 2;
+			num = 2;
+			break;
+		}
+
+		if (is_dp)
+			args.v3.acConfig.fCoherentMode = 1; /* DP requires coherent */
+		else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+			if (dig->coherent_mode)
+				args.v3.acConfig.fCoherentMode = 1;
+		}
+	} else if (ASIC_IS_DCE32(rdev)) {
 		if (dig->dig_encoder == 1)
 			args.v2.acConfig.ucEncoderSel = 1;
 		if (dig_connector->linkb)
@@ -849,7 +906,6 @@
 				args.v2.acConfig.fCoherentMode = 1;
 		}
 	} else {
-
 		args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL;
 
 		if (dig->dig_encoder)
@@ -1024,9 +1080,12 @@
 		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 	}
 	radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
-union crtc_sourc_param {
+union crtc_source_param {
 	SELECT_CRTC_SOURCE_PS_ALLOCATION v1;
 	SELECT_CRTC_SOURCE_PARAMETERS_V2 v2;
 };
@@ -1038,7 +1097,7 @@
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
-	union crtc_sourc_param args;
+	union crtc_source_param args;
 	int index = GetIndexIntoMasterTable(COMMAND, SelectCRTC_Source);
 	uint8_t frev, crev;
 	struct radeon_encoder_atom_dig *dig;
@@ -1107,10 +1166,26 @@
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
 			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 				dig = radeon_encoder->enc_priv;
-				if (dig->dig_encoder)
-					args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
-				else
+				switch (dig->dig_encoder) {
+				case 0:
 					args.v2.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+					break;
+				case 1:
+					args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+					break;
+				case 2:
+					args.v2.ucEncoderID = ASIC_INT_DIG3_ENCODER_ID;
+					break;
+				case 3:
+					args.v2.ucEncoderID = ASIC_INT_DIG4_ENCODER_ID;
+					break;
+				case 4:
+					args.v2.ucEncoderID = ASIC_INT_DIG5_ENCODER_ID;
+					break;
+				case 5:
+					args.v2.ucEncoderID = ASIC_INT_DIG6_ENCODER_ID;
+					break;
+				}
 				break;
 			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
 				args.v2.ucEncoderID = ASIC_INT_DVO_ENCODER_ID;
@@ -1167,6 +1242,7 @@
 	}
 
 	/* set scaler clears this on some chips */
+	/* XXX check DCE4 */
 	if (!(radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))) {
 		if (ASIC_IS_AVIVO(rdev) && (mode->flags & DRM_MODE_FLAG_INTERLACE))
 			WREG32(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset,
@@ -1183,6 +1259,33 @@
 	struct drm_encoder *test_encoder;
 	struct radeon_encoder_atom_dig *dig;
 	uint32_t dig_enc_in_use = 0;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		struct radeon_connector_atom_dig *dig_connector =
+			radeon_get_atom_connector_priv_from_encoder(encoder);
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			if (dig_connector->linkb)
+				return 1;
+			else
+				return 0;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			if (dig_connector->linkb)
+				return 3;
+			else
+				return 2;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			if (dig_connector->linkb)
+				return 5;
+			else
+				return 4;
+			break;
+		}
+	}
+
 	/* on DCE32 and encoder can driver any block so just crtc id */
 	if (ASIC_IS_DCE32(rdev)) {
 		return radeon_crtc->crtc_id;
@@ -1254,15 +1357,26 @@
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-		/* disable the encoder and transmitter */
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-		atombios_dig_encoder_setup(encoder, ATOM_DISABLE);
+		if (ASIC_IS_DCE4(rdev)) {
+			/* disable the transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+			/* setup and enable the encoder */
+			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP);
 
-		/* setup and enable the encoder and transmitter */
-		atombios_dig_encoder_setup(encoder, ATOM_ENABLE);
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+			/* init and enable the transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+		} else {
+			/* disable the encoder and transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+			atombios_dig_encoder_setup(encoder, ATOM_DISABLE);
+
+			/* setup and enable the encoder and transmitter */
+			atombios_dig_encoder_setup(encoder, ATOM_ENABLE);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+		}
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
 		atombios_ddia_setup(encoder, ATOM_ENABLE);
@@ -1282,7 +1396,9 @@
 	}
 	atombios_apply_encoder_quirks(encoder, adjusted_mode);
 
-	r600_hdmi_setmode(encoder, adjusted_mode);
+	/* XXX */
+	if (!ASIC_IS_DCE4(rdev))
+		r600_hdmi_setmode(encoder, adjusted_mode);
 }
 
 static bool
@@ -1480,10 +1596,18 @@
 		return;
 
 	encoder = &radeon_encoder->base;
-	if (rdev->flags & RADEON_SINGLE_CRTC)
+	switch (rdev->num_crtc) {
+	case 1:
 		encoder->possible_crtcs = 0x1;
-	else
+		break;
+	case 2:
+	default:
 		encoder->possible_crtcs = 0x3;
+		break;
+	case 6:
+		encoder->possible_crtcs = 0x3f;
+		break;
+	}
 
 	radeon_encoder->enc_priv = NULL;
 
diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h
index 797972e..93c7d5d 100644
--- a/drivers/gpu/drm/radeon/radeon_family.h
+++ b/drivers/gpu/drm/radeon/radeon_family.h
@@ -75,6 +75,11 @@
 	CHIP_RV730,
 	CHIP_RV710,
 	CHIP_RV740,
+	CHIP_CEDAR,
+	CHIP_REDWOOD,
+	CHIP_JUNIPER,
+	CHIP_CYPRESS,
+	CHIP_HEMLOCK,
 	CHIP_LAST,
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index d71e346..8fccbf2 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -39,6 +39,8 @@
 
 #include "drm_fb_helper.h"
 
+#include <linux/vga_switcheroo.h>
+
 struct radeon_fb_device {
 	struct drm_fb_helper helper;
 	struct radeon_framebuffer	*rfb;
@@ -148,7 +150,6 @@
 	unsigned long tmp;
 	bool fb_tiled = false; /* useful for testing */
 	u32 tiling_flags = 0;
-	int crtc_count;
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
@@ -239,11 +240,7 @@
 	rfbdev = info->par;
 	rfbdev->helper.funcs = &radeon_fb_helper_funcs;
 	rfbdev->helper.dev = dev;
-	if (rdev->flags & RADEON_SINGLE_CRTC)
-		crtc_count = 1;
-	else
-		crtc_count = 2;
-	ret = drm_fb_helper_init_crtc_count(&rfbdev->helper, crtc_count,
+	ret = drm_fb_helper_init_crtc_count(&rfbdev->helper, rdev->num_crtc,
 					    RADEONFB_CONN_LIMIT);
 	if (ret)
 		goto out_unref;
@@ -257,7 +254,7 @@
 	info->flags = FBINFO_DEFAULT;
 	info->fbops = &radeonfb_ops;
 
-	tmp = fb_gpuaddr - rdev->mc.vram_location;
+	tmp = fb_gpuaddr - rdev->mc.vram_start;
 	info->fix.smem_start = rdev->mc.aper_base + tmp;
 	info->fix.smem_len = size;
 	info->screen_base = fbptr;
@@ -291,6 +288,7 @@
 	rfbdev->rdev = rdev;
 
 	mutex_unlock(&rdev->ddev->struct_mutex);
+	vga_switcheroo_client_fb_set(rdev->ddev->pdev, info);
 	return 0;
 
 out_unref:
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index e73d56e..1770d3c 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -139,6 +139,7 @@
 	unsigned t;
 	unsigned p;
 	int i, j;
+	u64 page_base;
 
 	if (!rdev->gart.ready) {
 		WARN(1, "trying to unbind memory to unitialized GART !\n");
@@ -151,9 +152,11 @@
 			pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 			rdev->gart.pages[p] = NULL;
-			rdev->gart.pages_addr[p] = 0;
+			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
+			page_base = rdev->gart.pages_addr[p];
 			for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
-				radeon_gart_set_page(rdev, t, 0);
+				radeon_gart_set_page(rdev, t, page_base);
+				page_base += RADEON_GPU_PAGE_SIZE;
 			}
 		}
 	}
@@ -199,8 +202,26 @@
 	return 0;
 }
 
+void radeon_gart_restore(struct radeon_device *rdev)
+{
+	int i, j, t;
+	u64 page_base;
+
+	for (i = 0, t = 0; i < rdev->gart.num_cpu_pages; i++) {
+		page_base = rdev->gart.pages_addr[i];
+		for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
+			radeon_gart_set_page(rdev, t, page_base);
+			page_base += RADEON_GPU_PAGE_SIZE;
+		}
+	}
+	mb();
+	radeon_gart_tlb_flush(rdev);
+}
+
 int radeon_gart_init(struct radeon_device *rdev)
 {
+	int r, i;
+
 	if (rdev->gart.pages) {
 		return 0;
 	}
@@ -209,6 +230,9 @@
 		DRM_ERROR("Page size is smaller than GPU page size!\n");
 		return -EINVAL;
 	}
+	r = radeon_dummy_page_init(rdev);
+	if (r)
+		return r;
 	/* Compute table size */
 	rdev->gart.num_cpu_pages = rdev->mc.gtt_size / PAGE_SIZE;
 	rdev->gart.num_gpu_pages = rdev->mc.gtt_size / RADEON_GPU_PAGE_SIZE;
@@ -227,6 +251,10 @@
 		radeon_gart_fini(rdev);
 		return -ENOMEM;
 	}
+	/* set GART entry to point to the dummy page by default */
+	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
+		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index db8e9a3..ef92d14 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -69,9 +69,7 @@
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to allocate GEM object (%d, %d, %u, %d)\n",
 				  size, initial_domain, alignment, r);
-		mutex_lock(&rdev->ddev->struct_mutex);
-		drm_gem_object_unreference(gobj);
-		mutex_unlock(&rdev->ddev->struct_mutex);
+		drm_gem_object_unreference_unlocked(gobj);
 		return r;
 	}
 	gobj->driver_private = robj;
@@ -202,14 +200,10 @@
 	}
 	r = drm_gem_handle_create(filp, gobj, &handle);
 	if (r) {
-		mutex_lock(&dev->struct_mutex);
-		drm_gem_object_unreference(gobj);
-		mutex_unlock(&dev->struct_mutex);
+		drm_gem_object_unreference_unlocked(gobj);
 		return r;
 	}
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_handle_unreference(gobj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_handle_unreference_unlocked(gobj);
 	args->handle = handle;
 	return 0;
 }
@@ -236,9 +230,7 @@
 
 	r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
 
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gobj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gobj);
 	return r;
 }
 
@@ -255,9 +247,7 @@
 	}
 	robj = gobj->driver_private;
 	args->addr_ptr = radeon_bo_mmap_offset(robj);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gobj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gobj);
 	return 0;
 }
 
@@ -288,9 +278,7 @@
 	default:
 		break;
 	}
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gobj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gobj);
 	return r;
 }
 
@@ -311,9 +299,7 @@
 	/* callback hw specific functions if any */
 	if (robj->rdev->asic->ioctl_wait_idle)
 		robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gobj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gobj);
 	return r;
 }
 
@@ -331,9 +317,7 @@
 		return -EINVAL;
 	robj = gobj->driver_private;
 	r = radeon_bo_set_tiling_flags(robj, args->tiling_flags, args->pitch);
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gobj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gobj);
 	return r;
 }
 
@@ -356,8 +340,6 @@
 	radeon_bo_get_tiling_flags(rbo, &args->tiling_flags, &args->pitch);
 	radeon_bo_unreserve(rbo);
 out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gobj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(gobj);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index da3da1e..4ae50c1 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -26,6 +26,7 @@
 #include "drmP.h"
 #include "radeon_drm.h"
 #include "radeon.h"
+#include "atom.h"
 
 /**
  * radeon_ddc_probe
@@ -59,7 +60,7 @@
 }
 
 
-void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
+static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
 {
 	struct radeon_device *rdev = i2c->dev->dev_private;
 	struct radeon_i2c_bus_rec *rec = &i2c->rec;
@@ -71,13 +72,25 @@
 	 */
 	if (rec->hw_capable) {
 		if ((rdev->family >= CHIP_R200) && !ASIC_IS_AVIVO(rdev)) {
-			if (rec->a_clk_reg == RADEON_GPIO_MONID) {
+			u32 reg;
+
+			if (rdev->family >= CHIP_RV350)
+				reg = RADEON_GPIO_MONID;
+			else if ((rdev->family == CHIP_R300) ||
+				 (rdev->family == CHIP_R350))
+				reg = RADEON_GPIO_DVI_DDC;
+			else
+				reg = RADEON_GPIO_CRT2_DDC;
+
+			mutex_lock(&rdev->dc_hw_i2c_mutex);
+			if (rec->a_clk_reg == reg) {
 				WREG32(RADEON_DVI_I2C_CNTL_0, (RADEON_I2C_SOFT_RST |
 							       R200_DVI_I2C_PIN_SEL(R200_SEL_DDC1)));
 			} else {
 				WREG32(RADEON_DVI_I2C_CNTL_0, (RADEON_I2C_SOFT_RST |
 							       R200_DVI_I2C_PIN_SEL(R200_SEL_DDC3)));
 			}
+			mutex_unlock(&rdev->dc_hw_i2c_mutex);
 		}
 	}
 
@@ -168,6 +181,692 @@
 	WREG32(rec->en_data_reg, val);
 }
 
+static u32 radeon_get_i2c_prescale(struct radeon_device *rdev)
+{
+	struct radeon_pll *spll = &rdev->clock.spll;
+	u32 sclk = radeon_get_engine_clock(rdev);
+	u32 prescale = 0;
+	u32 n, m;
+	u8 loop;
+	int i2c_clock;
+
+	switch (rdev->family) {
+	case CHIP_R100:
+	case CHIP_RV100:
+	case CHIP_RS100:
+	case CHIP_RV200:
+	case CHIP_RS200:
+	case CHIP_R200:
+	case CHIP_RV250:
+	case CHIP_RS300:
+	case CHIP_RV280:
+	case CHIP_R300:
+	case CHIP_R350:
+	case CHIP_RV350:
+		n = (spll->reference_freq) / (4 * 6);
+		for (loop = 1; loop < 255; loop++) {
+			if ((loop * (loop - 1)) > n)
+				break;
+		}
+		m = loop - 1;
+		prescale = m | (loop << 8);
+		break;
+	case CHIP_RV380:
+	case CHIP_RS400:
+	case CHIP_RS480:
+	case CHIP_R420:
+	case CHIP_R423:
+	case CHIP_RV410:
+		sclk = radeon_get_engine_clock(rdev);
+		prescale = (((sclk * 10)/(4 * 128 * 100) + 1) << 8) + 128;
+		break;
+	case CHIP_RS600:
+	case CHIP_RS690:
+	case CHIP_RS740:
+		/* todo */
+		break;
+	case CHIP_RV515:
+	case CHIP_R520:
+	case CHIP_RV530:
+	case CHIP_RV560:
+	case CHIP_RV570:
+	case CHIP_R580:
+		i2c_clock = 50;
+		sclk = radeon_get_engine_clock(rdev);
+		if (rdev->family == CHIP_R520)
+			prescale = (127 << 8) + ((sclk * 10) / (4 * 127 * i2c_clock));
+		else
+			prescale = (((sclk * 10)/(4 * 128 * 100) + 1) << 8) + 128;
+		break;
+	case CHIP_R600:
+	case CHIP_RV610:
+	case CHIP_RV630:
+	case CHIP_RV670:
+		/* todo */
+		break;
+	case CHIP_RV620:
+	case CHIP_RV635:
+	case CHIP_RS780:
+	case CHIP_RS880:
+	case CHIP_RV770:
+	case CHIP_RV730:
+	case CHIP_RV710:
+	case CHIP_RV740:
+		/* todo */
+		break;
+	case CHIP_CEDAR:
+	case CHIP_REDWOOD:
+	case CHIP_JUNIPER:
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+		/* todo */
+		break;
+	default:
+		DRM_ERROR("i2c: unhandled radeon chip\n");
+		break;
+	}
+	return prescale;
+}
+
+
+/* hw i2c engine for r1xx-4xx hardware
+ * hw can buffer up to 15 bytes
+ */
+static int r100_hw_i2c_xfer(struct i2c_adapter *i2c_adap,
+			    struct i2c_msg *msgs, int num)
+{
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	struct i2c_msg *p;
+	int i, j, k, ret = num;
+	u32 prescale;
+	u32 i2c_cntl_0, i2c_cntl_1, i2c_data;
+	u32 tmp, reg;
+
+	mutex_lock(&rdev->dc_hw_i2c_mutex);
+	/* take the pm lock since we need a constant sclk */
+	mutex_lock(&rdev->pm.mutex);
+
+	prescale = radeon_get_i2c_prescale(rdev);
+
+	reg = ((prescale << RADEON_I2C_PRESCALE_SHIFT) |
+	       RADEON_I2C_START |
+	       RADEON_I2C_STOP |
+	       RADEON_I2C_GO);
+
+	if (rdev->is_atom_bios) {
+		tmp = RREG32(RADEON_BIOS_6_SCRATCH);
+		WREG32(RADEON_BIOS_6_SCRATCH, tmp | ATOM_S6_HW_I2C_BUSY_STATE);
+	}
+
+	if (rec->mm_i2c) {
+		i2c_cntl_0 = RADEON_I2C_CNTL_0;
+		i2c_cntl_1 = RADEON_I2C_CNTL_1;
+		i2c_data = RADEON_I2C_DATA;
+	} else {
+		i2c_cntl_0 = RADEON_DVI_I2C_CNTL_0;
+		i2c_cntl_1 = RADEON_DVI_I2C_CNTL_1;
+		i2c_data = RADEON_DVI_I2C_DATA;
+
+		switch (rdev->family) {
+		case CHIP_R100:
+		case CHIP_RV100:
+		case CHIP_RS100:
+		case CHIP_RV200:
+		case CHIP_RS200:
+		case CHIP_RS300:
+			switch (rec->mask_clk_reg) {
+			case RADEON_GPIO_DVI_DDC:
+				/* no gpio select bit */
+				break;
+			default:
+				DRM_ERROR("gpio not supported with hw i2c\n");
+				ret = -EINVAL;
+				goto done;
+			}
+			break;
+		case CHIP_R200:
+			/* only bit 4 on r200 */
+			switch (rec->mask_clk_reg) {
+			case RADEON_GPIO_DVI_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC1);
+				break;
+			case RADEON_GPIO_MONID:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC3);
+				break;
+			default:
+				DRM_ERROR("gpio not supported with hw i2c\n");
+				ret = -EINVAL;
+				goto done;
+			}
+			break;
+		case CHIP_RV250:
+		case CHIP_RV280:
+			/* bits 3 and 4 */
+			switch (rec->mask_clk_reg) {
+			case RADEON_GPIO_DVI_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC1);
+				break;
+			case RADEON_GPIO_VGA_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC2);
+				break;
+			case RADEON_GPIO_CRT2_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC3);
+				break;
+			default:
+				DRM_ERROR("gpio not supported with hw i2c\n");
+				ret = -EINVAL;
+				goto done;
+			}
+			break;
+		case CHIP_R300:
+		case CHIP_R350:
+			/* only bit 4 on r300/r350 */
+			switch (rec->mask_clk_reg) {
+			case RADEON_GPIO_VGA_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC1);
+				break;
+			case RADEON_GPIO_DVI_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC3);
+				break;
+			default:
+				DRM_ERROR("gpio not supported with hw i2c\n");
+				ret = -EINVAL;
+				goto done;
+			}
+			break;
+		case CHIP_RV350:
+		case CHIP_RV380:
+		case CHIP_R420:
+		case CHIP_R423:
+		case CHIP_RV410:
+		case CHIP_RS400:
+		case CHIP_RS480:
+			/* bits 3 and 4 */
+			switch (rec->mask_clk_reg) {
+			case RADEON_GPIO_VGA_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC1);
+				break;
+			case RADEON_GPIO_DVI_DDC:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC2);
+				break;
+			case RADEON_GPIO_MONID:
+				reg |= R200_DVI_I2C_PIN_SEL(R200_SEL_DDC3);
+				break;
+			default:
+				DRM_ERROR("gpio not supported with hw i2c\n");
+				ret = -EINVAL;
+				goto done;
+			}
+			break;
+		default:
+			DRM_ERROR("unsupported asic\n");
+			ret = -EINVAL;
+			goto done;
+			break;
+		}
+	}
+
+	/* check for bus probe */
+	p = &msgs[0];
+	if ((num == 1) && (p->len == 0)) {
+		WREG32(i2c_cntl_0, (RADEON_I2C_DONE |
+				    RADEON_I2C_NACK |
+				    RADEON_I2C_HALT |
+				    RADEON_I2C_SOFT_RST));
+		WREG32(i2c_data, (p->addr << 1) & 0xff);
+		WREG32(i2c_data, 0);
+		WREG32(i2c_cntl_1, ((1 << RADEON_I2C_DATA_COUNT_SHIFT) |
+				    (1 << RADEON_I2C_ADDR_COUNT_SHIFT) |
+				    RADEON_I2C_EN |
+				    (48 << RADEON_I2C_TIME_LIMIT_SHIFT)));
+		WREG32(i2c_cntl_0, reg);
+		for (k = 0; k < 32; k++) {
+			udelay(10);
+			tmp = RREG32(i2c_cntl_0);
+			if (tmp & RADEON_I2C_GO)
+				continue;
+			tmp = RREG32(i2c_cntl_0);
+			if (tmp & RADEON_I2C_DONE)
+				break;
+			else {
+				DRM_DEBUG("i2c write error 0x%08x\n", tmp);
+				WREG32(i2c_cntl_0, tmp | RADEON_I2C_ABORT);
+				ret = -EIO;
+				goto done;
+			}
+		}
+		goto done;
+	}
+
+	for (i = 0; i < num; i++) {
+		p = &msgs[i];
+		for (j = 0; j < p->len; j++) {
+			if (p->flags & I2C_M_RD) {
+				WREG32(i2c_cntl_0, (RADEON_I2C_DONE |
+						    RADEON_I2C_NACK |
+						    RADEON_I2C_HALT |
+						    RADEON_I2C_SOFT_RST));
+				WREG32(i2c_data, ((p->addr << 1) & 0xff) | 0x1);
+				WREG32(i2c_cntl_1, ((1 << RADEON_I2C_DATA_COUNT_SHIFT) |
+						    (1 << RADEON_I2C_ADDR_COUNT_SHIFT) |
+						    RADEON_I2C_EN |
+						    (48 << RADEON_I2C_TIME_LIMIT_SHIFT)));
+				WREG32(i2c_cntl_0, reg | RADEON_I2C_RECEIVE);
+				for (k = 0; k < 32; k++) {
+					udelay(10);
+					tmp = RREG32(i2c_cntl_0);
+					if (tmp & RADEON_I2C_GO)
+						continue;
+					tmp = RREG32(i2c_cntl_0);
+					if (tmp & RADEON_I2C_DONE)
+						break;
+					else {
+						DRM_DEBUG("i2c read error 0x%08x\n", tmp);
+						WREG32(i2c_cntl_0, tmp | RADEON_I2C_ABORT);
+						ret = -EIO;
+						goto done;
+					}
+				}
+				p->buf[j] = RREG32(i2c_data) & 0xff;
+			} else {
+				WREG32(i2c_cntl_0, (RADEON_I2C_DONE |
+						    RADEON_I2C_NACK |
+						    RADEON_I2C_HALT |
+						    RADEON_I2C_SOFT_RST));
+				WREG32(i2c_data, (p->addr << 1) & 0xff);
+				WREG32(i2c_data, p->buf[j]);
+				WREG32(i2c_cntl_1, ((1 << RADEON_I2C_DATA_COUNT_SHIFT) |
+						    (1 << RADEON_I2C_ADDR_COUNT_SHIFT) |
+						    RADEON_I2C_EN |
+						    (48 << RADEON_I2C_TIME_LIMIT_SHIFT)));
+				WREG32(i2c_cntl_0, reg);
+				for (k = 0; k < 32; k++) {
+					udelay(10);
+					tmp = RREG32(i2c_cntl_0);
+					if (tmp & RADEON_I2C_GO)
+						continue;
+					tmp = RREG32(i2c_cntl_0);
+					if (tmp & RADEON_I2C_DONE)
+						break;
+					else {
+						DRM_DEBUG("i2c write error 0x%08x\n", tmp);
+						WREG32(i2c_cntl_0, tmp | RADEON_I2C_ABORT);
+						ret = -EIO;
+						goto done;
+					}
+				}
+			}
+		}
+	}
+
+done:
+	WREG32(i2c_cntl_0, 0);
+	WREG32(i2c_cntl_1, 0);
+	WREG32(i2c_cntl_0, (RADEON_I2C_DONE |
+			    RADEON_I2C_NACK |
+			    RADEON_I2C_HALT |
+			    RADEON_I2C_SOFT_RST));
+
+	if (rdev->is_atom_bios) {
+		tmp = RREG32(RADEON_BIOS_6_SCRATCH);
+		tmp &= ~ATOM_S6_HW_I2C_BUSY_STATE;
+		WREG32(RADEON_BIOS_6_SCRATCH, tmp);
+	}
+
+	mutex_unlock(&rdev->pm.mutex);
+	mutex_unlock(&rdev->dc_hw_i2c_mutex);
+
+	return ret;
+}
+
+/* hw i2c engine for r5xx hardware
+ * hw can buffer up to 15 bytes
+ */
+static int r500_hw_i2c_xfer(struct i2c_adapter *i2c_adap,
+			    struct i2c_msg *msgs, int num)
+{
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	struct i2c_msg *p;
+	int i, j, remaining, current_count, buffer_offset, ret = num;
+	u32 prescale;
+	u32 tmp, reg;
+	u32 saved1, saved2;
+
+	mutex_lock(&rdev->dc_hw_i2c_mutex);
+	/* take the pm lock since we need a constant sclk */
+	mutex_lock(&rdev->pm.mutex);
+
+	prescale = radeon_get_i2c_prescale(rdev);
+
+	/* clear gpio mask bits */
+	tmp = RREG32(rec->mask_clk_reg);
+	tmp &= ~rec->mask_clk_mask;
+	WREG32(rec->mask_clk_reg, tmp);
+	tmp = RREG32(rec->mask_clk_reg);
+
+	tmp = RREG32(rec->mask_data_reg);
+	tmp &= ~rec->mask_data_mask;
+	WREG32(rec->mask_data_reg, tmp);
+	tmp = RREG32(rec->mask_data_reg);
+
+	/* clear pin values */
+	tmp = RREG32(rec->a_clk_reg);
+	tmp &= ~rec->a_clk_mask;
+	WREG32(rec->a_clk_reg, tmp);
+	tmp = RREG32(rec->a_clk_reg);
+
+	tmp = RREG32(rec->a_data_reg);
+	tmp &= ~rec->a_data_mask;
+	WREG32(rec->a_data_reg, tmp);
+	tmp = RREG32(rec->a_data_reg);
+
+	/* set the pins to input */
+	tmp = RREG32(rec->en_clk_reg);
+	tmp &= ~rec->en_clk_mask;
+	WREG32(rec->en_clk_reg, tmp);
+	tmp = RREG32(rec->en_clk_reg);
+
+	tmp = RREG32(rec->en_data_reg);
+	tmp &= ~rec->en_data_mask;
+	WREG32(rec->en_data_reg, tmp);
+	tmp = RREG32(rec->en_data_reg);
+
+	/* */
+	tmp = RREG32(RADEON_BIOS_6_SCRATCH);
+	WREG32(RADEON_BIOS_6_SCRATCH, tmp | ATOM_S6_HW_I2C_BUSY_STATE);
+	saved1 = RREG32(AVIVO_DC_I2C_CONTROL1);
+	saved2 = RREG32(0x494);
+	WREG32(0x494, saved2 | 0x1);
+
+	WREG32(AVIVO_DC_I2C_ARBITRATION, AVIVO_DC_I2C_SW_WANTS_TO_USE_I2C);
+	for (i = 0; i < 50; i++) {
+		udelay(1);
+		if (RREG32(AVIVO_DC_I2C_ARBITRATION) & AVIVO_DC_I2C_SW_CAN_USE_I2C)
+			break;
+	}
+	if (i == 50) {
+		DRM_ERROR("failed to get i2c bus\n");
+		ret = -EBUSY;
+		goto done;
+	}
+
+	reg = AVIVO_DC_I2C_START | AVIVO_DC_I2C_STOP | AVIVO_DC_I2C_EN;
+	switch (rec->mask_clk_reg) {
+	case AVIVO_DC_GPIO_DDC1_MASK:
+		reg |= AVIVO_DC_I2C_PIN_SELECT(AVIVO_SEL_DDC1);
+		break;
+	case AVIVO_DC_GPIO_DDC2_MASK:
+		reg |= AVIVO_DC_I2C_PIN_SELECT(AVIVO_SEL_DDC2);
+		break;
+	case AVIVO_DC_GPIO_DDC3_MASK:
+		reg |= AVIVO_DC_I2C_PIN_SELECT(AVIVO_SEL_DDC3);
+		break;
+	default:
+		DRM_ERROR("gpio not supported with hw i2c\n");
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/* check for bus probe */
+	p = &msgs[0];
+	if ((num == 1) && (p->len == 0)) {
+		WREG32(AVIVO_DC_I2C_STATUS1, (AVIVO_DC_I2C_DONE |
+					      AVIVO_DC_I2C_NACK |
+					      AVIVO_DC_I2C_HALT));
+		WREG32(AVIVO_DC_I2C_RESET, AVIVO_DC_I2C_SOFT_RESET);
+		udelay(1);
+		WREG32(AVIVO_DC_I2C_RESET, 0);
+
+		WREG32(AVIVO_DC_I2C_DATA, (p->addr << 1) & 0xff);
+		WREG32(AVIVO_DC_I2C_DATA, 0);
+
+		WREG32(AVIVO_DC_I2C_CONTROL3, AVIVO_DC_I2C_TIME_LIMIT(48));
+		WREG32(AVIVO_DC_I2C_CONTROL2, (AVIVO_DC_I2C_ADDR_COUNT(1) |
+					       AVIVO_DC_I2C_DATA_COUNT(1) |
+					       (prescale << 16)));
+		WREG32(AVIVO_DC_I2C_CONTROL1, reg);
+		WREG32(AVIVO_DC_I2C_STATUS1, AVIVO_DC_I2C_GO);
+		for (j = 0; j < 200; j++) {
+			udelay(50);
+			tmp = RREG32(AVIVO_DC_I2C_STATUS1);
+			if (tmp & AVIVO_DC_I2C_GO)
+				continue;
+			tmp = RREG32(AVIVO_DC_I2C_STATUS1);
+			if (tmp & AVIVO_DC_I2C_DONE)
+				break;
+			else {
+				DRM_DEBUG("i2c write error 0x%08x\n", tmp);
+				WREG32(AVIVO_DC_I2C_RESET, AVIVO_DC_I2C_ABORT);
+				ret = -EIO;
+				goto done;
+			}
+		}
+		goto done;
+	}
+
+	for (i = 0; i < num; i++) {
+		p = &msgs[i];
+		remaining = p->len;
+		buffer_offset = 0;
+		if (p->flags & I2C_M_RD) {
+			while (remaining) {
+				if (remaining > 15)
+					current_count = 15;
+				else
+					current_count = remaining;
+				WREG32(AVIVO_DC_I2C_STATUS1, (AVIVO_DC_I2C_DONE |
+							      AVIVO_DC_I2C_NACK |
+							      AVIVO_DC_I2C_HALT));
+				WREG32(AVIVO_DC_I2C_RESET, AVIVO_DC_I2C_SOFT_RESET);
+				udelay(1);
+				WREG32(AVIVO_DC_I2C_RESET, 0);
+
+				WREG32(AVIVO_DC_I2C_DATA, ((p->addr << 1) & 0xff) | 0x1);
+				WREG32(AVIVO_DC_I2C_CONTROL3, AVIVO_DC_I2C_TIME_LIMIT(48));
+				WREG32(AVIVO_DC_I2C_CONTROL2, (AVIVO_DC_I2C_ADDR_COUNT(1) |
+							       AVIVO_DC_I2C_DATA_COUNT(current_count) |
+							       (prescale << 16)));
+				WREG32(AVIVO_DC_I2C_CONTROL1, reg | AVIVO_DC_I2C_RECEIVE);
+				WREG32(AVIVO_DC_I2C_STATUS1, AVIVO_DC_I2C_GO);
+				for (j = 0; j < 200; j++) {
+					udelay(50);
+					tmp = RREG32(AVIVO_DC_I2C_STATUS1);
+					if (tmp & AVIVO_DC_I2C_GO)
+						continue;
+					tmp = RREG32(AVIVO_DC_I2C_STATUS1);
+					if (tmp & AVIVO_DC_I2C_DONE)
+						break;
+					else {
+						DRM_DEBUG("i2c read error 0x%08x\n", tmp);
+						WREG32(AVIVO_DC_I2C_RESET, AVIVO_DC_I2C_ABORT);
+						ret = -EIO;
+						goto done;
+					}
+				}
+				for (j = 0; j < current_count; j++)
+					p->buf[buffer_offset + j] = RREG32(AVIVO_DC_I2C_DATA) & 0xff;
+				remaining -= current_count;
+				buffer_offset += current_count;
+			}
+		} else {
+			while (remaining) {
+				if (remaining > 15)
+					current_count = 15;
+				else
+					current_count = remaining;
+				WREG32(AVIVO_DC_I2C_STATUS1, (AVIVO_DC_I2C_DONE |
+							      AVIVO_DC_I2C_NACK |
+							      AVIVO_DC_I2C_HALT));
+				WREG32(AVIVO_DC_I2C_RESET, AVIVO_DC_I2C_SOFT_RESET);
+				udelay(1);
+				WREG32(AVIVO_DC_I2C_RESET, 0);
+
+				WREG32(AVIVO_DC_I2C_DATA, (p->addr << 1) & 0xff);
+				for (j = 0; j < current_count; j++)
+					WREG32(AVIVO_DC_I2C_DATA, p->buf[buffer_offset + j]);
+
+				WREG32(AVIVO_DC_I2C_CONTROL3, AVIVO_DC_I2C_TIME_LIMIT(48));
+				WREG32(AVIVO_DC_I2C_CONTROL2, (AVIVO_DC_I2C_ADDR_COUNT(1) |
+							       AVIVO_DC_I2C_DATA_COUNT(current_count) |
+							       (prescale << 16)));
+				WREG32(AVIVO_DC_I2C_CONTROL1, reg);
+				WREG32(AVIVO_DC_I2C_STATUS1, AVIVO_DC_I2C_GO);
+				for (j = 0; j < 200; j++) {
+					udelay(50);
+					tmp = RREG32(AVIVO_DC_I2C_STATUS1);
+					if (tmp & AVIVO_DC_I2C_GO)
+						continue;
+					tmp = RREG32(AVIVO_DC_I2C_STATUS1);
+					if (tmp & AVIVO_DC_I2C_DONE)
+						break;
+					else {
+						DRM_DEBUG("i2c write error 0x%08x\n", tmp);
+						WREG32(AVIVO_DC_I2C_RESET, AVIVO_DC_I2C_ABORT);
+						ret = -EIO;
+						goto done;
+					}
+				}
+				remaining -= current_count;
+				buffer_offset += current_count;
+			}
+		}
+	}
+
+done:
+	WREG32(AVIVO_DC_I2C_STATUS1, (AVIVO_DC_I2C_DONE |
+				      AVIVO_DC_I2C_NACK |
+				      AVIVO_DC_I2C_HALT));
+	WREG32(AVIVO_DC_I2C_RESET, AVIVO_DC_I2C_SOFT_RESET);
+	udelay(1);
+	WREG32(AVIVO_DC_I2C_RESET, 0);
+
+	WREG32(AVIVO_DC_I2C_ARBITRATION, AVIVO_DC_I2C_SW_DONE_USING_I2C);
+	WREG32(AVIVO_DC_I2C_CONTROL1, saved1);
+	WREG32(0x494, saved2);
+	tmp = RREG32(RADEON_BIOS_6_SCRATCH);
+	tmp &= ~ATOM_S6_HW_I2C_BUSY_STATE;
+	WREG32(RADEON_BIOS_6_SCRATCH, tmp);
+
+	mutex_unlock(&rdev->pm.mutex);
+	mutex_unlock(&rdev->dc_hw_i2c_mutex);
+
+	return ret;
+}
+
+static int radeon_sw_i2c_xfer(struct i2c_adapter *i2c_adap,
+			      struct i2c_msg *msgs, int num)
+{
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	int ret;
+
+	radeon_i2c_do_lock(i2c, 1);
+	ret = i2c_transfer(&i2c->algo.radeon.bit_adapter, msgs, num);
+	radeon_i2c_do_lock(i2c, 0);
+
+	return ret;
+}
+
+static int radeon_i2c_xfer(struct i2c_adapter *i2c_adap,
+			   struct i2c_msg *msgs, int num)
+{
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	int ret;
+
+	switch (rdev->family) {
+	case CHIP_R100:
+	case CHIP_RV100:
+	case CHIP_RS100:
+	case CHIP_RV200:
+	case CHIP_RS200:
+	case CHIP_R200:
+	case CHIP_RV250:
+	case CHIP_RS300:
+	case CHIP_RV280:
+	case CHIP_R300:
+	case CHIP_R350:
+	case CHIP_RV350:
+	case CHIP_RV380:
+	case CHIP_R420:
+	case CHIP_R423:
+	case CHIP_RV410:
+	case CHIP_RS400:
+	case CHIP_RS480:
+		if (rec->hw_capable)
+			ret = r100_hw_i2c_xfer(i2c_adap, msgs, num);
+		else
+			ret = radeon_sw_i2c_xfer(i2c_adap, msgs, num);
+		break;
+	case CHIP_RS600:
+	case CHIP_RS690:
+	case CHIP_RS740:
+		/* XXX fill in hw i2c implementation */
+		ret = radeon_sw_i2c_xfer(i2c_adap, msgs, num);
+		break;
+	case CHIP_RV515:
+	case CHIP_R520:
+	case CHIP_RV530:
+	case CHIP_RV560:
+	case CHIP_RV570:
+	case CHIP_R580:
+		if (rec->hw_capable) {
+			if (rec->mm_i2c)
+				ret = r100_hw_i2c_xfer(i2c_adap, msgs, num);
+			else
+				ret = r500_hw_i2c_xfer(i2c_adap, msgs, num);
+		} else
+			ret = radeon_sw_i2c_xfer(i2c_adap, msgs, num);
+		break;
+	case CHIP_R600:
+	case CHIP_RV610:
+	case CHIP_RV630:
+	case CHIP_RV670:
+		/* XXX fill in hw i2c implementation */
+		ret = radeon_sw_i2c_xfer(i2c_adap, msgs, num);
+		break;
+	case CHIP_RV620:
+	case CHIP_RV635:
+	case CHIP_RS780:
+	case CHIP_RS880:
+	case CHIP_RV770:
+	case CHIP_RV730:
+	case CHIP_RV710:
+	case CHIP_RV740:
+		/* XXX fill in hw i2c implementation */
+		ret = radeon_sw_i2c_xfer(i2c_adap, msgs, num);
+		break;
+	case CHIP_CEDAR:
+	case CHIP_REDWOOD:
+	case CHIP_JUNIPER:
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+		/* XXX fill in hw i2c implementation */
+		ret = radeon_sw_i2c_xfer(i2c_adap, msgs, num);
+		break;
+	default:
+		DRM_ERROR("i2c: unhandled radeon chip\n");
+		ret = -EIO;
+		break;
+	}
+
+	return ret;
+}
+
+static u32 radeon_i2c_func(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm radeon_i2c_algo = {
+	.master_xfer = radeon_i2c_xfer,
+	.functionality = radeon_i2c_func,
+};
+
 struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev,
 					  struct radeon_i2c_bus_rec *rec,
 					  const char *name)
@@ -179,23 +878,36 @@
 	if (i2c == NULL)
 		return NULL;
 
-	i2c->adapter.owner = THIS_MODULE;
-	i2c->dev = dev;
-	i2c_set_adapdata(&i2c->adapter, i2c);
-	i2c->adapter.algo_data = &i2c->algo.bit;
-	i2c->algo.bit.setsda = set_data;
-	i2c->algo.bit.setscl = set_clock;
-	i2c->algo.bit.getsda = get_data;
-	i2c->algo.bit.getscl = get_clock;
-	i2c->algo.bit.udelay = 20;
+	/* set the internal bit adapter */
+	i2c->algo.radeon.bit_adapter.owner = THIS_MODULE;
+	i2c_set_adapdata(&i2c->algo.radeon.bit_adapter, i2c);
+	sprintf(i2c->algo.radeon.bit_adapter.name, "Radeon internal i2c bit bus %s", name);
+	i2c->algo.radeon.bit_adapter.algo_data = &i2c->algo.radeon.bit_data;
+	i2c->algo.radeon.bit_data.setsda = set_data;
+	i2c->algo.radeon.bit_data.setscl = set_clock;
+	i2c->algo.radeon.bit_data.getsda = get_data;
+	i2c->algo.radeon.bit_data.getscl = get_clock;
+	i2c->algo.radeon.bit_data.udelay = 20;
 	/* vesa says 2.2 ms is enough, 1 jiffy doesn't seem to always
 	 * make this, 2 jiffies is a lot more reliable */
-	i2c->algo.bit.timeout = 2;
-	i2c->algo.bit.data = i2c;
-	i2c->rec = *rec;
-	ret = i2c_bit_add_bus(&i2c->adapter);
+	i2c->algo.radeon.bit_data.timeout = 2;
+	i2c->algo.radeon.bit_data.data = i2c;
+	ret = i2c_bit_add_bus(&i2c->algo.radeon.bit_adapter);
 	if (ret) {
-		DRM_INFO("Failed to register i2c %s\n", name);
+		DRM_ERROR("Failed to register internal bit i2c %s\n", name);
+		goto out_free;
+	}
+	/* set the radeon i2c adapter */
+	i2c->dev = dev;
+	i2c->rec = *rec;
+	i2c->adapter.owner = THIS_MODULE;
+	i2c_set_adapdata(&i2c->adapter, i2c);
+	sprintf(i2c->adapter.name, "Radeon i2c %s", name);
+	i2c->adapter.algo_data = &i2c->algo.radeon;
+	i2c->adapter.algo = &radeon_i2c_algo;
+	ret = i2c_add_adapter(&i2c->adapter);
+	if (ret) {
+		DRM_ERROR("Failed to register i2c %s\n", name);
 		goto out_free;
 	}
 
@@ -237,11 +949,19 @@
 
 }
 
-
 void radeon_i2c_destroy(struct radeon_i2c_chan *i2c)
 {
 	if (!i2c)
 		return;
+	i2c_del_adapter(&i2c->algo.radeon.bit_adapter);
+	i2c_del_adapter(&i2c->adapter);
+	kfree(i2c);
+}
+
+void radeon_i2c_destroy_dp(struct radeon_i2c_chan *i2c)
+{
+	if (!i2c)
+		return;
 
 	i2c_del_adapter(&i2c->adapter);
 	kfree(i2c);
@@ -252,10 +972,10 @@
 	return NULL;
 }
 
-void radeon_i2c_sw_get_byte(struct radeon_i2c_chan *i2c_bus,
-			    u8 slave_addr,
-			    u8 addr,
-			    u8 *val)
+void radeon_i2c_get_byte(struct radeon_i2c_chan *i2c_bus,
+			 u8 slave_addr,
+			 u8 addr,
+			 u8 *val)
 {
 	u8 out_buf[2];
 	u8 in_buf[2];
@@ -286,10 +1006,10 @@
 	}
 }
 
-void radeon_i2c_sw_put_byte(struct radeon_i2c_chan *i2c_bus,
-			    u8 slave_addr,
-			    u8 addr,
-			    u8 val)
+void radeon_i2c_put_byte(struct radeon_i2c_chan *i2c_bus,
+			 u8 slave_addr,
+			 u8 addr,
+			 u8 val)
 {
 	uint8_t out_buf[2];
 	struct i2c_msg msg = {
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index f23b056..20ec276 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -30,6 +30,8 @@
 #include "radeon.h"
 #include "radeon_drm.h"
 
+#include <linux/vga_switcheroo.h>
+
 int radeon_driver_unload_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -136,6 +138,7 @@
 
 void radeon_driver_lastclose_kms(struct drm_device *dev)
 {
+	vga_switcheroo_process_delayed_switch();
 }
 
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
@@ -276,17 +279,17 @@
 	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH),
 	/* KMS */
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index b6d8081..df23d6a 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -403,7 +403,7 @@
 
 	/* if scanout was in GTT this really wouldn't work */
 	/* crtc offset is from display base addr not FB location */
-	radeon_crtc->legacy_display_base_addr = rdev->mc.vram_location;
+	radeon_crtc->legacy_display_base_addr = rdev->mc.vram_start;
 
 	base -= radeon_crtc->legacy_display_base_addr;
 
@@ -582,29 +582,6 @@
 				   ? RADEON_CRTC_V_SYNC_POL
 				   : 0));
 
-	/* TODO -> Dell Server */
-	if (0) {
-		uint32_t disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
-		uint32_t tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
-		uint32_t dac2_cntl = RREG32(RADEON_DAC_CNTL2);
-		uint32_t crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
-
-		dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
-		dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
-
-		/* For CRT on DAC2, don't turn it on if BIOS didn't
-		   enable it, even it's detected.
-		*/
-		disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
-		tv_dac_cntl &= ~((1<<2) | (3<<8) | (7<<24) | (0xff<<16));
-		tv_dac_cntl |= (0x03 | (2<<8) | (0x58<<16));
-
-		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
-		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
-		WREG32(RADEON_DAC_CNTL2, dac2_cntl);
-		WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
-	}
-
 	if (radeon_crtc->crtc_id) {
 		uint32_t crtc2_gen_cntl;
 		uint32_t disp2_merge_cntl;
@@ -726,6 +703,10 @@
 		pll = &rdev->clock.p1pll;
 
 	pll->flags = RADEON_PLL_LEGACY;
+	if (radeon_new_pll == 1)
+		pll->algo = PLL_ALGO_NEW;
+	else
+		pll->algo = PLL_ALGO_LEGACY;
 
 	if (mode->clock > 200000) /* range limits??? */
 		pll->flags |= RADEON_PLL_PREFER_HIGH_FB_DIV;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 38e45e2..cf389ce 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -115,6 +115,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_lvds_prepare(struct drm_encoder *encoder)
@@ -214,6 +217,11 @@
 				     struct drm_display_mode *adjusted_mode)
 {
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	/* adjust pm to upcoming mode change */
+	radeon_pm_compute_clocks(rdev);
 
 	/* set the active encoder to connector routing */
 	radeon_encoder_set_active_device(encoder);
@@ -285,6 +293,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_primary_dac_prepare(struct drm_encoder *encoder)
@@ -470,6 +481,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_tmds_int_prepare(struct drm_encoder *encoder)
@@ -635,6 +649,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_tmds_ext_prepare(struct drm_encoder *encoder)
@@ -842,6 +859,9 @@
 		radeon_atombios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
 	else
 		radeon_combios_encoder_dpms_scratch_regs(encoder, (mode == DRM_MODE_DPMS_ON) ? true : false);
+
+	/* adjust pm to dpms change */
+	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_legacy_tv_dac_prepare(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index e81b2ae..1702b82 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -83,6 +83,8 @@
 	bool valid;
 	/* id used by atom */
 	uint8_t i2c_id;
+	/* id used by atom */
+	uint8_t hpd_id;
 	/* can be used with hw i2c engine */
 	bool hw_capable;
 	/* uses multi-media i2c engine */
@@ -113,6 +115,7 @@
 
 #define RADEON_MAX_BIOS_CONNECTOR 16
 
+/* pll flags */
 #define RADEON_PLL_USE_BIOS_DIVS        (1 << 0)
 #define RADEON_PLL_NO_ODD_POST_DIV      (1 << 1)
 #define RADEON_PLL_USE_REF_DIV          (1 << 2)
@@ -127,6 +130,12 @@
 #define RADEON_PLL_PREFER_CLOSEST_LOWER (1 << 11)
 #define RADEON_PLL_USE_POST_DIV         (1 << 12)
 
+/* pll algo */
+enum radeon_pll_algo {
+	PLL_ALGO_LEGACY,
+	PLL_ALGO_NEW
+};
+
 struct radeon_pll {
 	/* reference frequency */
 	uint32_t reference_freq;
@@ -157,6 +166,13 @@
 
 	/* pll id */
 	uint32_t id;
+	/* pll algo */
+	enum radeon_pll_algo algo;
+};
+
+struct i2c_algo_radeon_data {
+	struct i2c_adapter bit_adapter;
+	struct i2c_algo_bit_data bit_data;
 };
 
 struct radeon_i2c_chan {
@@ -164,7 +180,7 @@
 	struct drm_device *dev;
 	union {
 		struct i2c_algo_dp_aux_data dp;
-		struct i2c_algo_bit_data bit;
+		struct i2c_algo_radeon_data radeon;
 	} algo;
 	struct radeon_i2c_bus_rec rec;
 };
@@ -193,7 +209,7 @@
 	struct card_info *atom_card_info;
 	enum radeon_connector_table connector_table;
 	bool mode_config_initialized;
-	struct radeon_crtc *crtcs[2];
+	struct radeon_crtc *crtcs[6];
 	/* DVI-I properties */
 	struct drm_property *coherent_mode_property;
 	/* DAC enable load detect */
@@ -202,7 +218,8 @@
 	struct drm_property *tv_std_property;
 	/* legacy TMDS PLL detect */
 	struct drm_property *tmds_pll_property;
-
+	/* hardcoded DFP edid from BIOS */
+	struct edid *bios_hardcoded_edid;
 };
 
 #define MAX_H_CODE_TIMING_LEN 32
@@ -237,6 +254,7 @@
 	fixed20_12 vsc;
 	fixed20_12 hsc;
 	struct drm_display_mode native_mode;
+	int pll_id;
 };
 
 struct radeon_encoder_primary_dac {
@@ -303,6 +321,7 @@
 	/* atom lvds */
 	uint32_t lvds_misc;
 	uint16_t panel_pwr_delay;
+	enum radeon_pll_algo pll_algo;
 	struct radeon_atom_ss *ss;
 	/* panel mode */
 	struct drm_display_mode native_mode;
@@ -398,6 +417,7 @@
 			  struct drm_connector *connector);
 extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector);
 extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector);
+extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action);
 extern void atombios_dig_transmitter_setup(struct drm_encoder *encoder,
 					   int action, uint8_t lane_num,
 					   uint8_t lane_set);
@@ -411,14 +431,15 @@
 						 struct radeon_i2c_bus_rec *rec,
 						 const char *name);
 extern void radeon_i2c_destroy(struct radeon_i2c_chan *i2c);
-extern void radeon_i2c_sw_get_byte(struct radeon_i2c_chan *i2c_bus,
-				   u8 slave_addr,
-				   u8 addr,
-				   u8 *val);
-extern void radeon_i2c_sw_put_byte(struct radeon_i2c_chan *i2c,
-				   u8 slave_addr,
-				   u8 addr,
-				   u8 val);
+extern void radeon_i2c_destroy_dp(struct radeon_i2c_chan *i2c);
+extern void radeon_i2c_get_byte(struct radeon_i2c_chan *i2c_bus,
+				u8 slave_addr,
+				u8 addr,
+				u8 *val);
+extern void radeon_i2c_put_byte(struct radeon_i2c_chan *i2c,
+				u8 slave_addr,
+				u8 addr,
+				u8 val);
 extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector);
 extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector);
 
@@ -432,14 +453,6 @@
 			       uint32_t *ref_div_p,
 			       uint32_t *post_div_p);
 
-extern void radeon_compute_pll_avivo(struct radeon_pll *pll,
-				     uint64_t freq,
-				     uint32_t *dot_clock_p,
-				     uint32_t *fb_div_p,
-				     uint32_t *frac_fb_div_p,
-				     uint32_t *ref_div_p,
-				     uint32_t *post_div_p);
-
 extern void radeon_setup_encoder_clones(struct drm_device *dev);
 
 struct drm_encoder *radeon_encoder_legacy_lvds_add(struct drm_device *dev, int bios_index);
@@ -473,6 +486,9 @@
 extern int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 				   int x, int y);
 
+extern bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev);
+extern struct edid *
+radeon_combios_get_hardcoded_edid(struct radeon_device *rdev);
 extern bool radeon_atom_get_clock_info(struct drm_device *dev);
 extern bool radeon_combios_get_clock_info(struct drm_device *dev);
 extern struct radeon_encoder_atom_dig *
@@ -531,7 +547,6 @@
 			       struct radeon_crtc *radeon_crtc);
 void radeon_legacy_init_crtc(struct drm_device *dev,
 			     struct radeon_crtc *radeon_crtc);
-extern void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state);
 
 void radeon_get_clock_info(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index f1da370..fc9d00a 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -178,7 +178,6 @@
 {
 	int r, i;
 
-	radeon_ttm_placement_from_domain(bo, domain);
 	if (bo->pin_count) {
 		bo->pin_count++;
 		if (gpu_addr)
@@ -186,6 +185,8 @@
 		return 0;
 	}
 	radeon_ttm_placement_from_domain(bo, domain);
+	/* force to pin into visible video ram */
+	bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < bo->placement.num_placement; i++)
 		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 8bce64c..d4d1c39 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -18,21 +18,413 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors: Rafał Miłecki <zajec5@gmail.com>
+ *          Alex Deucher <alexdeucher@gmail.com>
  */
 #include "drmP.h"
 #include "radeon.h"
+#include "avivod.h"
 
-int radeon_debugfs_pm_init(struct radeon_device *rdev);
+#define RADEON_IDLE_LOOP_MS 100
+#define RADEON_RECLOCK_DELAY_MS 200
+#define RADEON_WAIT_VBLANK_TIMEOUT 200
+
+static void radeon_pm_set_clocks_locked(struct radeon_device *rdev);
+static void radeon_pm_set_clocks(struct radeon_device *rdev);
+static void radeon_pm_idle_work_handler(struct work_struct *work);
+static int radeon_debugfs_pm_init(struct radeon_device *rdev);
+
+static const char *pm_state_names[4] = {
+	"PM_STATE_DISABLED",
+	"PM_STATE_MINIMUM",
+	"PM_STATE_PAUSED",
+	"PM_STATE_ACTIVE"
+};
+
+static const char *pm_state_types[5] = {
+	"Default",
+	"Powersave",
+	"Battery",
+	"Balanced",
+	"Performance",
+};
+
+static void radeon_print_power_mode_info(struct radeon_device *rdev)
+{
+	int i, j;
+	bool is_default;
+
+	DRM_INFO("%d Power State(s)\n", rdev->pm.num_power_states);
+	for (i = 0; i < rdev->pm.num_power_states; i++) {
+		if (rdev->pm.default_power_state == &rdev->pm.power_state[i])
+			is_default = true;
+		else
+			is_default = false;
+		DRM_INFO("State %d %s %s\n", i,
+			 pm_state_types[rdev->pm.power_state[i].type],
+			 is_default ? "(default)" : "");
+		if ((rdev->flags & RADEON_IS_PCIE) && !(rdev->flags & RADEON_IS_IGP))
+			DRM_INFO("\t%d PCIE Lanes\n", rdev->pm.power_state[i].non_clock_info.pcie_lanes);
+		DRM_INFO("\t%d Clock Mode(s)\n", rdev->pm.power_state[i].num_clock_modes);
+		for (j = 0; j < rdev->pm.power_state[i].num_clock_modes; j++) {
+			if (rdev->flags & RADEON_IS_IGP)
+				DRM_INFO("\t\t%d engine: %d\n",
+					 j,
+					 rdev->pm.power_state[i].clock_info[j].sclk * 10);
+			else
+				DRM_INFO("\t\t%d engine/memory: %d/%d\n",
+					 j,
+					 rdev->pm.power_state[i].clock_info[j].sclk * 10,
+					 rdev->pm.power_state[i].clock_info[j].mclk * 10);
+		}
+	}
+}
+
+static struct radeon_power_state * radeon_pick_power_state(struct radeon_device *rdev,
+							   enum radeon_pm_state_type type)
+{
+	int i, j;
+	enum radeon_pm_state_type wanted_types[2];
+	int wanted_count;
+
+	switch (type) {
+	case POWER_STATE_TYPE_DEFAULT:
+	default:
+		return rdev->pm.default_power_state;
+	case POWER_STATE_TYPE_POWERSAVE:
+		if (rdev->flags & RADEON_IS_MOBILITY) {
+			wanted_types[0] = POWER_STATE_TYPE_POWERSAVE;
+			wanted_types[1] = POWER_STATE_TYPE_BATTERY;
+			wanted_count = 2;
+		} else {
+			wanted_types[0] = POWER_STATE_TYPE_PERFORMANCE;
+			wanted_count = 1;
+		}
+		break;
+	case POWER_STATE_TYPE_BATTERY:
+		if (rdev->flags & RADEON_IS_MOBILITY) {
+			wanted_types[0] = POWER_STATE_TYPE_BATTERY;
+			wanted_types[1] = POWER_STATE_TYPE_POWERSAVE;
+			wanted_count = 2;
+		} else {
+			wanted_types[0] = POWER_STATE_TYPE_PERFORMANCE;
+			wanted_count = 1;
+		}
+		break;
+	case POWER_STATE_TYPE_BALANCED:
+	case POWER_STATE_TYPE_PERFORMANCE:
+		wanted_types[0] = type;
+		wanted_count = 1;
+		break;
+	}
+
+	for (i = 0; i < wanted_count; i++) {
+		for (j = 0; j < rdev->pm.num_power_states; j++) {
+			if (rdev->pm.power_state[j].type == wanted_types[i])
+				return &rdev->pm.power_state[j];
+		}
+	}
+
+	return rdev->pm.default_power_state;
+}
+
+static struct radeon_pm_clock_info * radeon_pick_clock_mode(struct radeon_device *rdev,
+							    struct radeon_power_state *power_state,
+							    enum radeon_pm_clock_mode_type type)
+{
+	switch (type) {
+	case POWER_MODE_TYPE_DEFAULT:
+	default:
+		return power_state->default_clock_mode;
+	case POWER_MODE_TYPE_LOW:
+		return &power_state->clock_info[0];
+	case POWER_MODE_TYPE_MID:
+		if (power_state->num_clock_modes > 2)
+			return &power_state->clock_info[1];
+		else
+			return &power_state->clock_info[0];
+		break;
+	case POWER_MODE_TYPE_HIGH:
+		return &power_state->clock_info[power_state->num_clock_modes - 1];
+	}
+
+}
+
+static void radeon_get_power_state(struct radeon_device *rdev,
+				   enum radeon_pm_action action)
+{
+	switch (action) {
+	case PM_ACTION_MINIMUM:
+		rdev->pm.requested_power_state = radeon_pick_power_state(rdev, POWER_STATE_TYPE_BATTERY);
+		rdev->pm.requested_clock_mode =
+			radeon_pick_clock_mode(rdev, rdev->pm.requested_power_state, POWER_MODE_TYPE_LOW);
+		break;
+	case PM_ACTION_DOWNCLOCK:
+		rdev->pm.requested_power_state = radeon_pick_power_state(rdev, POWER_STATE_TYPE_POWERSAVE);
+		rdev->pm.requested_clock_mode =
+			radeon_pick_clock_mode(rdev, rdev->pm.requested_power_state, POWER_MODE_TYPE_MID);
+		break;
+	case PM_ACTION_UPCLOCK:
+		rdev->pm.requested_power_state = radeon_pick_power_state(rdev, POWER_STATE_TYPE_DEFAULT);
+		rdev->pm.requested_clock_mode =
+			radeon_pick_clock_mode(rdev, rdev->pm.requested_power_state, POWER_MODE_TYPE_HIGH);
+		break;
+	case PM_ACTION_NONE:
+	default:
+		DRM_ERROR("Requested mode for not defined action\n");
+		return;
+	}
+	DRM_INFO("Requested: e: %d m: %d p: %d\n",
+		 rdev->pm.requested_clock_mode->sclk,
+		 rdev->pm.requested_clock_mode->mclk,
+		 rdev->pm.requested_power_state->non_clock_info.pcie_lanes);
+}
+
+static void radeon_set_power_state(struct radeon_device *rdev)
+{
+	/* if *_clock_mode are the same, *_power_state are as well */
+	if (rdev->pm.requested_clock_mode == rdev->pm.current_clock_mode)
+		return;
+
+	DRM_INFO("Setting: e: %d m: %d p: %d\n",
+		 rdev->pm.requested_clock_mode->sclk,
+		 rdev->pm.requested_clock_mode->mclk,
+		 rdev->pm.requested_power_state->non_clock_info.pcie_lanes);
+	/* set pcie lanes */
+	/* set voltage */
+	/* set engine clock */
+	radeon_set_engine_clock(rdev, rdev->pm.requested_clock_mode->sclk);
+	/* set memory clock */
+
+	rdev->pm.current_power_state = rdev->pm.requested_power_state;
+	rdev->pm.current_clock_mode = rdev->pm.requested_clock_mode;
+}
 
 int radeon_pm_init(struct radeon_device *rdev)
 {
+	rdev->pm.state = PM_STATE_DISABLED;
+	rdev->pm.planned_action = PM_ACTION_NONE;
+	rdev->pm.downclocked = false;
+
+	if (rdev->bios) {
+		if (rdev->is_atom_bios)
+			radeon_atombios_get_power_modes(rdev);
+		else
+			radeon_combios_get_power_modes(rdev);
+		radeon_print_power_mode_info(rdev);
+	}
+
 	if (radeon_debugfs_pm_init(rdev)) {
 		DRM_ERROR("Failed to register debugfs file for PM!\n");
 	}
 
+	INIT_DELAYED_WORK(&rdev->pm.idle_work, radeon_pm_idle_work_handler);
+
+	if (radeon_dynpm != -1 && radeon_dynpm) {
+		rdev->pm.state = PM_STATE_PAUSED;
+		DRM_INFO("radeon: dynamic power management enabled\n");
+	}
+
+	DRM_INFO("radeon: power management initialized\n");
+
 	return 0;
 }
 
+void radeon_pm_compute_clocks(struct radeon_device *rdev)
+{
+	struct drm_device *ddev = rdev->ddev;
+	struct drm_connector *connector;
+	struct radeon_crtc *radeon_crtc;
+	int count = 0;
+
+	if (rdev->pm.state == PM_STATE_DISABLED)
+		return;
+
+	mutex_lock(&rdev->pm.mutex);
+
+	rdev->pm.active_crtcs = 0;
+	list_for_each_entry(connector,
+		&ddev->mode_config.connector_list, head) {
+		if (connector->encoder &&
+			connector->dpms != DRM_MODE_DPMS_OFF) {
+			radeon_crtc = to_radeon_crtc(connector->encoder->crtc);
+			rdev->pm.active_crtcs |= (1 << radeon_crtc->crtc_id);
+			++count;
+		}
+	}
+
+	if (count > 1) {
+		if (rdev->pm.state == PM_STATE_ACTIVE) {
+			cancel_delayed_work(&rdev->pm.idle_work);
+
+			rdev->pm.state = PM_STATE_PAUSED;
+			rdev->pm.planned_action = PM_ACTION_UPCLOCK;
+			if (rdev->pm.downclocked)
+				radeon_pm_set_clocks(rdev);
+
+			DRM_DEBUG("radeon: dynamic power management deactivated\n");
+		}
+	} else if (count == 1) {
+		/* TODO: Increase clocks if needed for current mode */
+
+		if (rdev->pm.state == PM_STATE_MINIMUM) {
+			rdev->pm.state = PM_STATE_ACTIVE;
+			rdev->pm.planned_action = PM_ACTION_UPCLOCK;
+			radeon_pm_set_clocks(rdev);
+
+			queue_delayed_work(rdev->wq, &rdev->pm.idle_work,
+				msecs_to_jiffies(RADEON_IDLE_LOOP_MS));
+		}
+		else if (rdev->pm.state == PM_STATE_PAUSED) {
+			rdev->pm.state = PM_STATE_ACTIVE;
+			queue_delayed_work(rdev->wq, &rdev->pm.idle_work,
+				msecs_to_jiffies(RADEON_IDLE_LOOP_MS));
+			DRM_DEBUG("radeon: dynamic power management activated\n");
+		}
+	}
+	else { /* count == 0 */
+		if (rdev->pm.state != PM_STATE_MINIMUM) {
+			cancel_delayed_work(&rdev->pm.idle_work);
+
+			rdev->pm.state = PM_STATE_MINIMUM;
+			rdev->pm.planned_action = PM_ACTION_MINIMUM;
+			radeon_pm_set_clocks(rdev);
+		}
+	}
+
+	mutex_unlock(&rdev->pm.mutex);
+}
+
+static bool radeon_pm_debug_check_in_vbl(struct radeon_device *rdev, bool finish)
+{
+	u32 stat_crtc1 = 0, stat_crtc2 = 0;
+	bool in_vbl = true;
+
+	if (ASIC_IS_AVIVO(rdev)) {
+		if (rdev->pm.active_crtcs & (1 << 0)) {
+			stat_crtc1 = RREG32(D1CRTC_STATUS);
+			if (!(stat_crtc1 & 1))
+				in_vbl = false;
+		}
+		if (rdev->pm.active_crtcs & (1 << 1)) {
+			stat_crtc2 = RREG32(D2CRTC_STATUS);
+			if (!(stat_crtc2 & 1))
+				in_vbl = false;
+		}
+	}
+	if (in_vbl == false)
+		DRM_INFO("not in vbl for pm change %08x %08x at %s\n", stat_crtc1,
+			 stat_crtc2, finish ? "exit" : "entry");
+	return in_vbl;
+}
+static void radeon_pm_set_clocks_locked(struct radeon_device *rdev)
+{
+	/*radeon_fence_wait_last(rdev);*/
+	switch (rdev->pm.planned_action) {
+	case PM_ACTION_UPCLOCK:
+		rdev->pm.downclocked = false;
+		break;
+	case PM_ACTION_DOWNCLOCK:
+		rdev->pm.downclocked = true;
+		break;
+	case PM_ACTION_MINIMUM:
+		break;
+	case PM_ACTION_NONE:
+		DRM_ERROR("%s: PM_ACTION_NONE\n", __func__);
+		break;
+	}
+
+	/* check if we are in vblank */
+	radeon_pm_debug_check_in_vbl(rdev, false);
+	radeon_set_power_state(rdev);
+	radeon_pm_debug_check_in_vbl(rdev, true);
+	rdev->pm.planned_action = PM_ACTION_NONE;
+}
+
+static void radeon_pm_set_clocks(struct radeon_device *rdev)
+{
+	radeon_get_power_state(rdev, rdev->pm.planned_action);
+	mutex_lock(&rdev->cp.mutex);
+
+	if (rdev->pm.active_crtcs & (1 << 0)) {
+		rdev->pm.req_vblank |= (1 << 0);
+		drm_vblank_get(rdev->ddev, 0);
+	}
+	if (rdev->pm.active_crtcs & (1 << 1)) {
+		rdev->pm.req_vblank |= (1 << 1);
+		drm_vblank_get(rdev->ddev, 1);
+	}
+	if (rdev->pm.active_crtcs)
+		wait_event_interruptible_timeout(
+			rdev->irq.vblank_queue, 0,
+			msecs_to_jiffies(RADEON_WAIT_VBLANK_TIMEOUT));
+	if (rdev->pm.req_vblank & (1 << 0)) {
+		rdev->pm.req_vblank &= ~(1 << 0);
+		drm_vblank_put(rdev->ddev, 0);
+	}
+	if (rdev->pm.req_vblank & (1 << 1)) {
+		rdev->pm.req_vblank &= ~(1 << 1);
+		drm_vblank_put(rdev->ddev, 1);
+	}
+
+	radeon_pm_set_clocks_locked(rdev);
+	mutex_unlock(&rdev->cp.mutex);
+}
+
+static void radeon_pm_idle_work_handler(struct work_struct *work)
+{
+	struct radeon_device *rdev;
+	rdev = container_of(work, struct radeon_device,
+				pm.idle_work.work);
+
+	mutex_lock(&rdev->pm.mutex);
+	if (rdev->pm.state == PM_STATE_ACTIVE) {
+		unsigned long irq_flags;
+		int not_processed = 0;
+
+		read_lock_irqsave(&rdev->fence_drv.lock, irq_flags);
+		if (!list_empty(&rdev->fence_drv.emited)) {
+			struct list_head *ptr;
+			list_for_each(ptr, &rdev->fence_drv.emited) {
+				/* count up to 3, that's enought info */
+				if (++not_processed >= 3)
+					break;
+			}
+		}
+		read_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags);
+
+		if (not_processed >= 3) { /* should upclock */
+			if (rdev->pm.planned_action == PM_ACTION_DOWNCLOCK) {
+				rdev->pm.planned_action = PM_ACTION_NONE;
+			} else if (rdev->pm.planned_action == PM_ACTION_NONE &&
+				rdev->pm.downclocked) {
+				rdev->pm.planned_action =
+					PM_ACTION_UPCLOCK;
+				rdev->pm.action_timeout = jiffies +
+				msecs_to_jiffies(RADEON_RECLOCK_DELAY_MS);
+			}
+		} else if (not_processed == 0) { /* should downclock */
+			if (rdev->pm.planned_action == PM_ACTION_UPCLOCK) {
+				rdev->pm.planned_action = PM_ACTION_NONE;
+			} else if (rdev->pm.planned_action == PM_ACTION_NONE &&
+				!rdev->pm.downclocked) {
+				rdev->pm.planned_action =
+					PM_ACTION_DOWNCLOCK;
+				rdev->pm.action_timeout = jiffies +
+				msecs_to_jiffies(RADEON_RECLOCK_DELAY_MS);
+			}
+		}
+
+		if (rdev->pm.planned_action != PM_ACTION_NONE &&
+		    jiffies > rdev->pm.action_timeout) {
+			radeon_pm_set_clocks(rdev);
+		}
+	}
+	mutex_unlock(&rdev->pm.mutex);
+
+	queue_delayed_work(rdev->wq, &rdev->pm.idle_work,
+					msecs_to_jiffies(RADEON_IDLE_LOOP_MS));
+}
+
 /*
  * Debugfs info
  */
@@ -44,11 +436,14 @@
 	struct drm_device *dev = node->minor->dev;
 	struct radeon_device *rdev = dev->dev_private;
 
+	seq_printf(m, "state: %s\n", pm_state_names[rdev->pm.state]);
 	seq_printf(m, "default engine clock: %u0 kHz\n", rdev->clock.default_sclk);
 	seq_printf(m, "current engine clock: %u0 kHz\n", radeon_get_engine_clock(rdev));
 	seq_printf(m, "default memory clock: %u0 kHz\n", rdev->clock.default_mclk);
 	if (rdev->asic->get_memory_clock)
 		seq_printf(m, "current memory clock: %u0 kHz\n", radeon_get_memory_clock(rdev));
+	if (rdev->asic->get_pcie_lanes)
+		seq_printf(m, "PCIE lanes: %d\n", radeon_get_pcie_lanes(rdev));
 
 	return 0;
 }
@@ -58,7 +453,7 @@
 };
 #endif
 
-int radeon_debugfs_pm_init(struct radeon_device *rdev)
+static int radeon_debugfs_pm_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	return radeon_debugfs_add_files(rdev, radeon_pm_info_list, ARRAY_SIZE(radeon_pm_info_list));
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index 6d0a009..5c0dc08 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -54,7 +54,7 @@
 #include "r300_reg.h"
 #include "r500_reg.h"
 #include "r600_reg.h"
-
+#include "evergreen_reg.h"
 
 #define RADEON_MC_AGP_LOCATION		0x014c
 #define		RADEON_MC_AGP_START_MASK	0x0000FFFF
@@ -1060,32 +1060,38 @@
 
        /* Multimedia I2C bus */
 #define RADEON_I2C_CNTL_0		    0x0090
-#define RADEON_I2C_DONE                     (1 << 0)
-#define RADEON_I2C_NACK                     (1 << 1)
-#define RADEON_I2C_HALT                     (1 << 2)
-#define RADEON_I2C_SOFT_RST                 (1 << 5)
-#define RADEON_I2C_DRIVE_EN                 (1 << 6)
-#define RADEON_I2C_DRIVE_SEL                (1 << 7)
-#define RADEON_I2C_START                    (1 << 8)
-#define RADEON_I2C_STOP                     (1 << 9)
-#define RADEON_I2C_RECEIVE                  (1 << 10)
-#define RADEON_I2C_ABORT                    (1 << 11)
-#define RADEON_I2C_GO                       (1 << 12)
-#define RADEON_I2C_PRESCALE_SHIFT           16
+#       define RADEON_I2C_DONE              (1 << 0)
+#       define RADEON_I2C_NACK              (1 << 1)
+#       define RADEON_I2C_HALT              (1 << 2)
+#       define RADEON_I2C_SOFT_RST          (1 << 5)
+#       define RADEON_I2C_DRIVE_EN          (1 << 6)
+#       define RADEON_I2C_DRIVE_SEL         (1 << 7)
+#       define RADEON_I2C_START             (1 << 8)
+#       define RADEON_I2C_STOP              (1 << 9)
+#       define RADEON_I2C_RECEIVE           (1 << 10)
+#       define RADEON_I2C_ABORT             (1 << 11)
+#       define RADEON_I2C_GO                (1 << 12)
+#       define RADEON_I2C_PRESCALE_SHIFT    16
 #define RADEON_I2C_CNTL_1                   0x0094
-#define RADEON_I2C_DATA_COUNT_SHIFT         0
-#define RADEON_I2C_ADDR_COUNT_SHIFT         4
-#define RADEON_I2C_INTRA_BYTE_DELAY_SHIFT   8
-#define RADEON_I2C_SEL                      (1 << 16)
-#define RADEON_I2C_EN                       (1 << 17)
-#define RADEON_I2C_TIME_LIMIT_SHIFT         24
+#       define RADEON_I2C_DATA_COUNT_SHIFT  0
+#       define RADEON_I2C_ADDR_COUNT_SHIFT  4
+#       define RADEON_I2C_INTRA_BYTE_DELAY_SHIFT   8
+#       define RADEON_I2C_SEL               (1 << 16)
+#       define RADEON_I2C_EN                (1 << 17)
+#       define RADEON_I2C_TIME_LIMIT_SHIFT  24
 #define RADEON_I2C_DATA			    0x0098
 
 #define RADEON_DVI_I2C_CNTL_0		    0x02e0
 #       define R200_DVI_I2C_PIN_SEL(x)      ((x) << 3)
-#       define R200_SEL_DDC1                0 /* 0x60 - VGA_DDC */
-#       define R200_SEL_DDC2                1 /* 0x64 - DVI_DDC */
-#       define R200_SEL_DDC3                2 /* 0x68 - MONID_DDC */
+#       define R200_SEL_DDC1                0 /* depends on asic */
+#       define R200_SEL_DDC2                1 /* depends on asic */
+#       define R200_SEL_DDC3                2 /* depends on asic */
+#	define RADEON_SW_WANTS_TO_USE_DVI_I2C (1 << 13)
+#	define RADEON_SW_CAN_USE_DVI_I2C      (1 << 13)
+#	define RADEON_SW_DONE_USING_DVI_I2C   (1 << 14)
+#	define RADEON_HW_NEEDS_DVI_I2C        (1 << 14)
+#	define RADEON_ABORT_HW_DVI_I2C        (1 << 15)
+#	define RADEON_HW_USING_DVI_I2C        (1 << 15)
 #define RADEON_DVI_I2C_CNTL_1               0x02e4
 #define RADEON_DVI_I2C_DATA		    0x02e8
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 6579eb4..e50513a 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -34,6 +34,36 @@
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev);
 
+void radeon_ib_bogus_cleanup(struct radeon_device *rdev)
+{
+	struct radeon_ib *ib, *n;
+
+	list_for_each_entry_safe(ib, n, &rdev->ib_pool.bogus_ib, list) {
+		list_del(&ib->list);
+		vfree(ib->ptr);
+		kfree(ib);
+	}
+}
+
+void radeon_ib_bogus_add(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ib *bib;
+
+	bib = kmalloc(sizeof(*bib), GFP_KERNEL);
+	if (bib == NULL)
+		return;
+	bib->ptr = vmalloc(ib->length_dw * 4);
+	if (bib->ptr == NULL) {
+		kfree(bib);
+		return;
+	}
+	memcpy(bib->ptr, ib->ptr, ib->length_dw * 4);
+	bib->length_dw = ib->length_dw;
+	mutex_lock(&rdev->ib_pool.mutex);
+	list_add_tail(&bib->list, &rdev->ib_pool.bogus_ib);
+	mutex_unlock(&rdev->ib_pool.mutex);
+}
+
 /*
  * IB.
  */
@@ -142,6 +172,7 @@
 
 	if (rdev->ib_pool.robj)
 		return 0;
+	INIT_LIST_HEAD(&rdev->ib_pool.bogus_ib);
 	/* Allocate 1M object buffer */
 	r = radeon_bo_create(rdev, NULL,  RADEON_IB_POOL_SIZE*64*1024,
 				true, RADEON_GEM_DOMAIN_GTT,
@@ -192,6 +223,8 @@
 		return;
 	}
 	mutex_lock(&rdev->ib_pool.mutex);
+	radeon_ib_bogus_cleanup(rdev);
+
 	if (rdev->ib_pool.robj) {
 		r = radeon_bo_reserve(rdev->ib_pool.robj, false);
 		if (likely(r == 0)) {
@@ -349,15 +382,49 @@
 	return 0;
 }
 
+static int radeon_debugfs_ib_bogus_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct radeon_device *rdev = node->info_ent->data;
+	struct radeon_ib *ib;
+	unsigned i;
+
+	mutex_lock(&rdev->ib_pool.mutex);
+	if (list_empty(&rdev->ib_pool.bogus_ib)) {
+		mutex_unlock(&rdev->ib_pool.mutex);
+		seq_printf(m, "no bogus IB recorded\n");
+		return 0;
+	}
+	ib = list_first_entry(&rdev->ib_pool.bogus_ib, struct radeon_ib, list);
+	list_del_init(&ib->list);
+	mutex_unlock(&rdev->ib_pool.mutex);
+	seq_printf(m, "IB size %05u dwords\n", ib->length_dw);
+	for (i = 0; i < ib->length_dw; i++) {
+		seq_printf(m, "[%05u]=0x%08X\n", i, ib->ptr[i]);
+	}
+	vfree(ib->ptr);
+	kfree(ib);
+	return 0;
+}
+
 static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE];
 static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32];
+
+static struct drm_info_list radeon_debugfs_ib_bogus_info_list[] = {
+	{"radeon_ib_bogus", radeon_debugfs_ib_bogus_info, 0, NULL},
+};
 #endif
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	unsigned i;
+	int r;
 
+	radeon_debugfs_ib_bogus_info_list[0].data = rdev;
+	r = radeon_debugfs_add_files(rdev, radeon_debugfs_ib_bogus_info_list, 1);
+	if (r)
+		return r;
 	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
 		sprintf(radeon_debugfs_ib_names[i], "radeon_ib_%04u", i);
 		radeon_debugfs_ib_list[i].name = radeon_debugfs_ib_names[i];
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 067167c..3c32f84 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -29,6 +29,7 @@
 
 #include "drmP.h"
 #include "drm.h"
+#include "drm_buffer.h"
 #include "drm_sarea.h"
 #include "radeon_drm.h"
 #include "radeon_drv.h"
@@ -91,21 +92,27 @@
 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
 						     dev_priv,
 						     struct drm_file *file_priv,
-						     int id, u32 *data)
+						     int id, struct drm_buffer *buf)
 {
+	u32 *data;
 	switch (id) {
 
 	case RADEON_EMIT_PP_MISC:
-		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
-		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
+		data = drm_buffer_pointer_to_dword(buf,
+			(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
+
+		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
 			DRM_ERROR("Invalid depth buffer offset\n");
 			return -EINVAL;
 		}
+		dev_priv->have_z_offset = 1;
 		break;
 
 	case RADEON_EMIT_PP_CNTL:
-		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
-		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
+		data = drm_buffer_pointer_to_dword(buf,
+			(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
+
+		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
 			DRM_ERROR("Invalid colour buffer offset\n");
 			return -EINVAL;
 		}
@@ -117,8 +124,8 @@
 	case R200_EMIT_PP_TXOFFSET_3:
 	case R200_EMIT_PP_TXOFFSET_4:
 	case R200_EMIT_PP_TXOFFSET_5:
-		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
-						  &data[0])) {
+		data = drm_buffer_pointer_to_dword(buf, 0);
+		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
 			DRM_ERROR("Invalid R200 texture offset\n");
 			return -EINVAL;
 		}
@@ -127,8 +134,9 @@
 	case RADEON_EMIT_PP_TXFILTER_0:
 	case RADEON_EMIT_PP_TXFILTER_1:
 	case RADEON_EMIT_PP_TXFILTER_2:
-		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
-		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
+		data = drm_buffer_pointer_to_dword(buf,
+			(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
+		if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
 			DRM_ERROR("Invalid R100 texture offset\n");
 			return -EINVAL;
 		}
@@ -142,9 +150,10 @@
 	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
 			int i;
 			for (i = 0; i < 5; i++) {
+				data = drm_buffer_pointer_to_dword(buf, i);
 				if (radeon_check_and_fixup_offset(dev_priv,
 								  file_priv,
-								  &data[i])) {
+								  data)) {
 					DRM_ERROR
 					    ("Invalid R200 cubic texture offset\n");
 					return -EINVAL;
@@ -158,9 +167,10 @@
 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
 			int i;
 			for (i = 0; i < 5; i++) {
+				data = drm_buffer_pointer_to_dword(buf, i);
 				if (radeon_check_and_fixup_offset(dev_priv,
 								  file_priv,
-								  &data[i])) {
+								  data)) {
 					DRM_ERROR
 					    ("Invalid R100 cubic texture offset\n");
 					return -EINVAL;
@@ -269,23 +279,24 @@
 						     cmdbuf,
 						     unsigned int *cmdsz)
 {
-	u32 *cmd = (u32 *) cmdbuf->buf;
+	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 	u32 offset, narrays;
 	int count, i, k;
 
-	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+	count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+	*cmdsz = 2 + count;
 
-	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
+	if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
 		DRM_ERROR("Not a type 3 packet\n");
 		return -EINVAL;
 	}
 
-	if (4 * *cmdsz > cmdbuf->bufsz) {
+	if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
 		DRM_ERROR("Packet size larger than size of data provided\n");
 		return -EINVAL;
 	}
 
-	switch(cmd[0] & 0xff00) {
+	switch (*cmd & 0xff00) {
 	/* XXX Are there old drivers needing other packets? */
 
 	case RADEON_3D_DRAW_IMMD:
@@ -312,7 +323,6 @@
 		break;
 
 	case RADEON_3D_LOAD_VBPNTR:
-		count = (cmd[0] >> 16) & 0x3fff;
 
 		if (count > 18) { /* 12 arrays max */
 			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
@@ -321,13 +331,16 @@
 		}
 
 		/* carefully check packet contents */
-		narrays = cmd[1] & ~0xc000;
+		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+
+		narrays = *cmd & ~0xc000;
 		k = 0;
 		i = 2;
 		while ((k < narrays) && (i < (count + 2))) {
 			i++;		/* skip attribute field */
+			cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
 			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
-							  &cmd[i])) {
+							  cmd)) {
 				DRM_ERROR
 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
 				     k, i);
@@ -338,8 +351,10 @@
 			if (k == narrays)
 				break;
 			/* have one more to process, they come in pairs */
+			cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
+
 			if (radeon_check_and_fixup_offset(dev_priv,
-							  file_priv, &cmd[i]))
+							  file_priv, cmd))
 			{
 				DRM_ERROR
 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
@@ -363,7 +378,9 @@
 			DRM_ERROR("Invalid 3d packet for r200-class chip\n");
 			return -EINVAL;
 		}
-		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
+
+		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+		if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
 				DRM_ERROR("Invalid rndr_gen_indx offset\n");
 				return -EINVAL;
 		}
@@ -374,12 +391,15 @@
 			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
 			return -EINVAL;
 		}
-		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
-			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
+
+		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+		if ((*cmd & 0x8000ffff) != 0x80000810) {
+			DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
 			return -EINVAL;
 		}
-		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
-			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
+		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
+		if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
+			DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
 			return -EINVAL;
 		}
 		break;
@@ -388,31 +408,34 @@
 	case RADEON_CNTL_PAINT_MULTI:
 	case RADEON_CNTL_BITBLT_MULTI:
 		/* MSB of opcode: next DWORD GUI_CNTL */
-		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
+		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
+		if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
-			offset = cmd[2] << 10;
+			u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
+			offset = *cmd2 << 10;
 			if (radeon_check_and_fixup_offset
 			    (dev_priv, file_priv, &offset)) {
 				DRM_ERROR("Invalid first packet offset\n");
 				return -EINVAL;
 			}
-			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
+			*cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
 		}
 
-		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
-		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
-			offset = cmd[3] << 10;
+		if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
+		    (*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
+			u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
+			offset = *cmd << 10;
 			if (radeon_check_and_fixup_offset
 			    (dev_priv, file_priv, &offset)) {
 				DRM_ERROR("Invalid second packet offset\n");
 				return -EINVAL;
 			}
-			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
+			*cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
 		}
 		break;
 
 	default:
-		DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
+		DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
 		return -EINVAL;
 	}
 
@@ -876,6 +899,11 @@
 		if (tmp & RADEON_BACK)
 			flags |= RADEON_FRONT;
 	}
+	if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
+		if (!dev_priv->have_z_offset)
+			printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
+		flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
+	}
 
 	if (flags & (RADEON_FRONT | RADEON_BACK)) {
 
@@ -2611,7 +2639,6 @@
 {
 	int id = (int)header.packet.packet_id;
 	int sz, reg;
-	int *data = (int *)cmdbuf->buf;
 	RING_LOCALS;
 
 	if (id >= RADEON_MAX_STATE_PACKETS)
@@ -2620,23 +2647,22 @@
 	sz = packet[id].len;
 	reg = packet[id].start;
 
-	if (sz * sizeof(int) > cmdbuf->bufsz) {
+	if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
 		DRM_ERROR("Packet size provided larger than data provided\n");
 		return -EINVAL;
 	}
 
-	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
+	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
+				cmdbuf->buffer)) {
 		DRM_ERROR("Packet verification failed\n");
 		return -EINVAL;
 	}
 
 	BEGIN_RING(sz + 1);
 	OUT_RING(CP_PACKET0(reg, (sz - 1)));
-	OUT_RING_TABLE(data, sz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 	ADVANCE_RING();
 
-	cmdbuf->buf += sz * sizeof(int);
-	cmdbuf->bufsz -= sz * sizeof(int);
 	return 0;
 }
 
@@ -2653,10 +2679,8 @@
 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
-	OUT_RING_TABLE(cmdbuf->buf, sz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 	ADVANCE_RING();
-	cmdbuf->buf += sz * sizeof(int);
-	cmdbuf->bufsz -= sz * sizeof(int);
 	return 0;
 }
 
@@ -2675,10 +2699,8 @@
 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
-	OUT_RING_TABLE(cmdbuf->buf, sz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 	ADVANCE_RING();
-	cmdbuf->buf += sz * sizeof(int);
-	cmdbuf->bufsz -= sz * sizeof(int);
 	return 0;
 }
 
@@ -2696,11 +2718,9 @@
 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
 	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
-	OUT_RING_TABLE(cmdbuf->buf, sz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 	ADVANCE_RING();
 
-	cmdbuf->buf += sz * sizeof(int);
-	cmdbuf->bufsz -= sz * sizeof(int);
 	return 0;
 }
 
@@ -2714,7 +2734,7 @@
 
         if (!sz)
                 return 0;
-        if (sz * 4 > cmdbuf->bufsz)
+	if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
                 return -EINVAL;
 
 	BEGIN_RING(5 + sz);
@@ -2722,11 +2742,9 @@
 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
 	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
-	OUT_RING_TABLE(cmdbuf->buf, sz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 	ADVANCE_RING();
 
-	cmdbuf->buf += sz * sizeof(int);
-	cmdbuf->bufsz -= sz * sizeof(int);
 	return 0;
 }
 
@@ -2748,11 +2766,9 @@
 	}
 
 	BEGIN_RING(cmdsz);
-	OUT_RING_TABLE(cmdbuf->buf, cmdsz);
+	OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
 	ADVANCE_RING();
 
-	cmdbuf->buf += cmdsz * 4;
-	cmdbuf->bufsz -= cmdsz * 4;
 	return 0;
 }
 
@@ -2805,16 +2821,16 @@
 		}
 
 		BEGIN_RING(cmdsz);
-		OUT_RING_TABLE(cmdbuf->buf, cmdsz);
+		OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
 		ADVANCE_RING();
 
 	} while (++i < cmdbuf->nbox);
 	if (cmdbuf->nbox == 1)
 		cmdbuf->nbox = 0;
 
+	return 0;
       out:
-	cmdbuf->buf += cmdsz * 4;
-	cmdbuf->bufsz -= cmdsz * 4;
+	drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
 	return 0;
 }
 
@@ -2847,16 +2863,16 @@
 	return 0;
 }
 
-static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
+static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf *buf = NULL;
+	drm_radeon_cmd_header_t stack_header;
 	int idx;
 	drm_radeon_kcmd_buffer_t *cmdbuf = data;
-	drm_radeon_cmd_header_t header;
-	int orig_nbox, orig_bufsz;
-	char *kbuf = NULL;
+	int orig_nbox;
 
 	LOCK_TEST_WITH_RETURN(dev, file_priv);
 
@@ -2871,17 +2887,16 @@
 	 * races between checking values and using those values in other code,
 	 * and simply to avoid a lot of function calls to copy in data.
 	 */
-	orig_bufsz = cmdbuf->bufsz;
-	if (orig_bufsz != 0) {
-		kbuf = kmalloc(cmdbuf->bufsz, GFP_KERNEL);
-		if (kbuf == NULL)
-			return -ENOMEM;
-		if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
-				       cmdbuf->bufsz)) {
-			kfree(kbuf);
-			return -EFAULT;
-		}
-		cmdbuf->buf = kbuf;
+	if (cmdbuf->bufsz != 0) {
+		int rv;
+		void __user *buffer = cmdbuf->buffer;
+		rv = drm_buffer_alloc(&cmdbuf->buffer, cmdbuf->bufsz);
+		if (rv)
+			return rv;
+		rv = drm_buffer_copy_from_user(cmdbuf->buffer, buffer,
+						cmdbuf->bufsz);
+		if (rv)
+			return rv;
 	}
 
 	orig_nbox = cmdbuf->nbox;
@@ -2890,24 +2905,24 @@
 		int temp;
 		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
 
-		if (orig_bufsz != 0)
-			kfree(kbuf);
+		if (cmdbuf->bufsz != 0)
+			drm_buffer_free(cmdbuf->buffer);
 
 		return temp;
 	}
 
 	/* microcode_version != r300 */
-	while (cmdbuf->bufsz >= sizeof(header)) {
+	while (drm_buffer_unprocessed(cmdbuf->buffer) >= sizeof(stack_header)) {
 
-		header.i = *(int *)cmdbuf->buf;
-		cmdbuf->buf += sizeof(header);
-		cmdbuf->bufsz -= sizeof(header);
+		drm_radeon_cmd_header_t *header;
+		header = drm_buffer_read_object(cmdbuf->buffer,
+				sizeof(stack_header), &stack_header);
 
-		switch (header.header.cmd_type) {
+		switch (header->header.cmd_type) {
 		case RADEON_CMD_PACKET:
 			DRM_DEBUG("RADEON_CMD_PACKET\n");
 			if (radeon_emit_packets
-			    (dev_priv, file_priv, header, cmdbuf)) {
+			    (dev_priv, file_priv, *header, cmdbuf)) {
 				DRM_ERROR("radeon_emit_packets failed\n");
 				goto err;
 			}
@@ -2915,7 +2930,7 @@
 
 		case RADEON_CMD_SCALARS:
 			DRM_DEBUG("RADEON_CMD_SCALARS\n");
-			if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
+			if (radeon_emit_scalars(dev_priv, *header, cmdbuf)) {
 				DRM_ERROR("radeon_emit_scalars failed\n");
 				goto err;
 			}
@@ -2923,7 +2938,7 @@
 
 		case RADEON_CMD_VECTORS:
 			DRM_DEBUG("RADEON_CMD_VECTORS\n");
-			if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
+			if (radeon_emit_vectors(dev_priv, *header, cmdbuf)) {
 				DRM_ERROR("radeon_emit_vectors failed\n");
 				goto err;
 			}
@@ -2931,7 +2946,7 @@
 
 		case RADEON_CMD_DMA_DISCARD:
 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
-			idx = header.dma.buf_idx;
+			idx = header->dma.buf_idx;
 			if (idx < 0 || idx >= dma->buf_count) {
 				DRM_ERROR("buffer index %d (of %d max)\n",
 					  idx, dma->buf_count - 1);
@@ -2968,7 +2983,7 @@
 
 		case RADEON_CMD_SCALARS2:
 			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
-			if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
+			if (radeon_emit_scalars2(dev_priv, *header, cmdbuf)) {
 				DRM_ERROR("radeon_emit_scalars2 failed\n");
 				goto err;
 			}
@@ -2976,37 +2991,37 @@
 
 		case RADEON_CMD_WAIT:
 			DRM_DEBUG("RADEON_CMD_WAIT\n");
-			if (radeon_emit_wait(dev, header.wait.flags)) {
+			if (radeon_emit_wait(dev, header->wait.flags)) {
 				DRM_ERROR("radeon_emit_wait failed\n");
 				goto err;
 			}
 			break;
 		case RADEON_CMD_VECLINEAR:
 			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
-			if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
+			if (radeon_emit_veclinear(dev_priv, *header, cmdbuf)) {
 				DRM_ERROR("radeon_emit_veclinear failed\n");
 				goto err;
 			}
 			break;
 
 		default:
-			DRM_ERROR("bad cmd_type %d at %p\n",
-				  header.header.cmd_type,
-				  cmdbuf->buf - sizeof(header));
+			DRM_ERROR("bad cmd_type %d at byte %d\n",
+				  header->header.cmd_type,
+				  cmdbuf->buffer->iterator);
 			goto err;
 		}
 	}
 
-	if (orig_bufsz != 0)
-		kfree(kbuf);
+	if (cmdbuf->bufsz != 0)
+		drm_buffer_free(cmdbuf->buffer);
 
 	DRM_DEBUG("DONE\n");
 	COMMIT_RING();
 	return 0;
 
       err:
-	if (orig_bufsz != 0)
-		kfree(kbuf);
+	if (cmdbuf->bufsz != 0)
+		drm_buffer_free(cmdbuf->buffer);
 	return -EINVAL;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 9f5e2f9..313c96b 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -186,7 +186,7 @@
 		radeon_bo_kunmap(gtt_obj[i]);
 
 		DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
-			 gtt_addr - rdev->mc.gtt_location);
+			 gtt_addr - rdev->mc.gtt_start);
 	}
 
 out_cleanup:
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 58b5adf..43c5ab3 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -150,7 +150,7 @@
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		break;
 	case TTM_PL_TT:
-		man->gpu_offset = rdev->mc.gtt_location;
+		man->gpu_offset = rdev->mc.gtt_start;
 		man->available_caching = TTM_PL_MASK_CACHING;
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
@@ -180,7 +180,7 @@
 		break;
 	case TTM_PL_VRAM:
 		/* "On-card" video ram */
-		man->gpu_offset = rdev->mc.vram_location;
+		man->gpu_offset = rdev->mc.vram_start;
 		man->flags = TTM_MEMTYPE_FLAG_FIXED |
 			     TTM_MEMTYPE_FLAG_NEEDS_IOREMAP |
 			     TTM_MEMTYPE_FLAG_MAPPABLE;
@@ -262,10 +262,10 @@
 
 	switch (old_mem->mem_type) {
 	case TTM_PL_VRAM:
-		old_start += rdev->mc.vram_location;
+		old_start += rdev->mc.vram_start;
 		break;
 	case TTM_PL_TT:
-		old_start += rdev->mc.gtt_location;
+		old_start += rdev->mc.gtt_start;
 		break;
 	default:
 		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
@@ -273,10 +273,10 @@
 	}
 	switch (new_mem->mem_type) {
 	case TTM_PL_VRAM:
-		new_start += rdev->mc.vram_location;
+		new_start += rdev->mc.vram_start;
 		break;
 	case TTM_PL_TT:
-		new_start += rdev->mc.gtt_location;
+		new_start += rdev->mc.gtt_start;
 		break;
 	default:
 		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
new file mode 100644
index 0000000..8f414a5
--- /dev/null
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -0,0 +1,837 @@
+r600 0x9400
+0x000287A0 R7xx_CB_SHADER_CONTROL
+0x00028230 R7xx_PA_SC_EDGERULE
+0x000286C8 R7xx_SPI_THREAD_GROUPING
+0x00008D8C R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
+0x000088C4 VGT_CACHE_INVALIDATION
+0x00028A50 VGT_ENHANCE
+0x000088CC VGT_ES_PER_GS
+0x00028A2C VGT_GROUP_DECR
+0x00028A28 VGT_GROUP_FIRST_DECR
+0x00028A24 VGT_GROUP_PRIM_TYPE
+0x00028A30 VGT_GROUP_VECT_0_CNTL
+0x00028A38 VGT_GROUP_VECT_0_FMT_CNTL
+0x00028A34 VGT_GROUP_VECT_1_CNTL
+0x00028A3C VGT_GROUP_VECT_1_FMT_CNTL
+0x00028A40 VGT_GS_MODE
+0x00028A6C VGT_GS_OUT_PRIM_TYPE
+0x000088C8 VGT_GS_PER_ES
+0x000088E8 VGT_GS_PER_VS
+0x000088D4 VGT_GS_VERTEX_REUSE
+0x00028A14 VGT_HOS_CNTL
+0x00028A18 VGT_HOS_MAX_TESS_LEVEL
+0x00028A1C VGT_HOS_MIN_TESS_LEVEL
+0x00028A20 VGT_HOS_REUSE_DEPTH
+0x0000895C VGT_INDEX_TYPE
+0x00028408 VGT_INDX_OFFSET
+0x00028AA0 VGT_INSTANCE_STEP_RATE_0
+0x00028AA4 VGT_INSTANCE_STEP_RATE_1
+0x000088C0 VGT_LAST_COPY_STATE
+0x00028400 VGT_MAX_VTX_INDX
+0x000088D8 VGT_MC_LAT_CNTL
+0x00028404 VGT_MIN_VTX_INDX
+0x00028A94 VGT_MULTI_PRIM_IB_RESET_EN
+0x0002840C VGT_MULTI_PRIM_IB_RESET_INDX
+0x00008970 VGT_NUM_INDICES
+0x00008974 VGT_NUM_INSTANCES
+0x00028A10 VGT_OUTPUT_PATH_CNTL
+0x00028C5C VGT_OUT_DEALLOC_CNTL
+0x00028A84 VGT_PRIMITIVEID_EN
+0x00008958 VGT_PRIMITIVE_TYPE
+0x00028AB4 VGT_REUSE_OFF
+0x00028C58 VGT_VERTEX_REUSE_BLOCK_CNTL
+0x00028AB8 VGT_VTX_CNT_EN
+0x000088B0 VGT_VTX_VECT_EJECT_REG
+0x00028810 PA_CL_CLIP_CNTL
+0x00008A14 PA_CL_ENHANCE
+0x00028C14 PA_CL_GB_HORZ_CLIP_ADJ
+0x00028C18 PA_CL_GB_HORZ_DISC_ADJ
+0x00028C0C PA_CL_GB_VERT_CLIP_ADJ
+0x00028C10 PA_CL_GB_VERT_DISC_ADJ
+0x00028820 PA_CL_NANINF_CNTL
+0x00028E1C PA_CL_POINT_CULL_RAD
+0x00028E18 PA_CL_POINT_SIZE
+0x00028E10 PA_CL_POINT_X_RAD
+0x00028E14 PA_CL_POINT_Y_RAD
+0x00028E2C PA_CL_UCP_0_W
+0x00028E3C PA_CL_UCP_1_W
+0x00028E4C PA_CL_UCP_2_W
+0x00028E5C PA_CL_UCP_3_W
+0x00028E6C PA_CL_UCP_4_W
+0x00028E7C PA_CL_UCP_5_W
+0x00028E20 PA_CL_UCP_0_X
+0x00028E30 PA_CL_UCP_1_X
+0x00028E40 PA_CL_UCP_2_X
+0x00028E50 PA_CL_UCP_3_X
+0x00028E60 PA_CL_UCP_4_X
+0x00028E70 PA_CL_UCP_5_X
+0x00028E24 PA_CL_UCP_0_Y
+0x00028E34 PA_CL_UCP_1_Y
+0x00028E44 PA_CL_UCP_2_Y
+0x00028E54 PA_CL_UCP_3_Y
+0x00028E64 PA_CL_UCP_4_Y
+0x00028E74 PA_CL_UCP_5_Y
+0x00028E28 PA_CL_UCP_0_Z
+0x00028E38 PA_CL_UCP_1_Z
+0x00028E48 PA_CL_UCP_2_Z
+0x00028E58 PA_CL_UCP_3_Z
+0x00028E68 PA_CL_UCP_4_Z
+0x00028E78 PA_CL_UCP_5_Z
+0x00028440 PA_CL_VPORT_XOFFSET_0
+0x00028458 PA_CL_VPORT_XOFFSET_1
+0x00028470 PA_CL_VPORT_XOFFSET_2
+0x00028488 PA_CL_VPORT_XOFFSET_3
+0x000284A0 PA_CL_VPORT_XOFFSET_4
+0x000284B8 PA_CL_VPORT_XOFFSET_5
+0x000284D0 PA_CL_VPORT_XOFFSET_6
+0x000284E8 PA_CL_VPORT_XOFFSET_7
+0x00028500 PA_CL_VPORT_XOFFSET_8
+0x00028518 PA_CL_VPORT_XOFFSET_9
+0x00028530 PA_CL_VPORT_XOFFSET_10
+0x00028548 PA_CL_VPORT_XOFFSET_11
+0x00028560 PA_CL_VPORT_XOFFSET_12
+0x00028578 PA_CL_VPORT_XOFFSET_13
+0x00028590 PA_CL_VPORT_XOFFSET_14
+0x000285A8 PA_CL_VPORT_XOFFSET_15
+0x0002843C PA_CL_VPORT_XSCALE_0
+0x00028454 PA_CL_VPORT_XSCALE_1
+0x0002846C PA_CL_VPORT_XSCALE_2
+0x00028484 PA_CL_VPORT_XSCALE_3
+0x0002849C PA_CL_VPORT_XSCALE_4
+0x000284B4 PA_CL_VPORT_XSCALE_5
+0x000284CC PA_CL_VPORT_XSCALE_6
+0x000284E4 PA_CL_VPORT_XSCALE_7
+0x000284FC PA_CL_VPORT_XSCALE_8
+0x00028514 PA_CL_VPORT_XSCALE_9
+0x0002852C PA_CL_VPORT_XSCALE_10
+0x00028544 PA_CL_VPORT_XSCALE_11
+0x0002855C PA_CL_VPORT_XSCALE_12
+0x00028574 PA_CL_VPORT_XSCALE_13
+0x0002858C PA_CL_VPORT_XSCALE_14
+0x000285A4 PA_CL_VPORT_XSCALE_15
+0x00028448 PA_CL_VPORT_YOFFSET_0
+0x00028460 PA_CL_VPORT_YOFFSET_1
+0x00028478 PA_CL_VPORT_YOFFSET_2
+0x00028490 PA_CL_VPORT_YOFFSET_3
+0x000284A8 PA_CL_VPORT_YOFFSET_4
+0x000284C0 PA_CL_VPORT_YOFFSET_5
+0x000284D8 PA_CL_VPORT_YOFFSET_6
+0x000284F0 PA_CL_VPORT_YOFFSET_7
+0x00028508 PA_CL_VPORT_YOFFSET_8
+0x00028520 PA_CL_VPORT_YOFFSET_9
+0x00028538 PA_CL_VPORT_YOFFSET_10
+0x00028550 PA_CL_VPORT_YOFFSET_11
+0x00028568 PA_CL_VPORT_YOFFSET_12
+0x00028580 PA_CL_VPORT_YOFFSET_13
+0x00028598 PA_CL_VPORT_YOFFSET_14
+0x000285B0 PA_CL_VPORT_YOFFSET_15
+0x00028444 PA_CL_VPORT_YSCALE_0
+0x0002845C PA_CL_VPORT_YSCALE_1
+0x00028474 PA_CL_VPORT_YSCALE_2
+0x0002848C PA_CL_VPORT_YSCALE_3
+0x000284A4 PA_CL_VPORT_YSCALE_4
+0x000284BC PA_CL_VPORT_YSCALE_5
+0x000284D4 PA_CL_VPORT_YSCALE_6
+0x000284EC PA_CL_VPORT_YSCALE_7
+0x00028504 PA_CL_VPORT_YSCALE_8
+0x0002851C PA_CL_VPORT_YSCALE_9
+0x00028534 PA_CL_VPORT_YSCALE_10
+0x0002854C PA_CL_VPORT_YSCALE_11
+0x00028564 PA_CL_VPORT_YSCALE_12
+0x0002857C PA_CL_VPORT_YSCALE_13
+0x00028594 PA_CL_VPORT_YSCALE_14
+0x000285AC PA_CL_VPORT_YSCALE_15
+0x00028450 PA_CL_VPORT_ZOFFSET_0
+0x00028468 PA_CL_VPORT_ZOFFSET_1
+0x00028480 PA_CL_VPORT_ZOFFSET_2
+0x00028498 PA_CL_VPORT_ZOFFSET_3
+0x000284B0 PA_CL_VPORT_ZOFFSET_4
+0x000284C8 PA_CL_VPORT_ZOFFSET_5
+0x000284E0 PA_CL_VPORT_ZOFFSET_6
+0x000284F8 PA_CL_VPORT_ZOFFSET_7
+0x00028510 PA_CL_VPORT_ZOFFSET_8
+0x00028528 PA_CL_VPORT_ZOFFSET_9
+0x00028540 PA_CL_VPORT_ZOFFSET_10
+0x00028558 PA_CL_VPORT_ZOFFSET_11
+0x00028570 PA_CL_VPORT_ZOFFSET_12
+0x00028588 PA_CL_VPORT_ZOFFSET_13
+0x000285A0 PA_CL_VPORT_ZOFFSET_14
+0x000285B8 PA_CL_VPORT_ZOFFSET_15
+0x0002844C PA_CL_VPORT_ZSCALE_0
+0x00028464 PA_CL_VPORT_ZSCALE_1
+0x0002847C PA_CL_VPORT_ZSCALE_2
+0x00028494 PA_CL_VPORT_ZSCALE_3
+0x000284AC PA_CL_VPORT_ZSCALE_4
+0x000284C4 PA_CL_VPORT_ZSCALE_5
+0x000284DC PA_CL_VPORT_ZSCALE_6
+0x000284F4 PA_CL_VPORT_ZSCALE_7
+0x0002850C PA_CL_VPORT_ZSCALE_8
+0x00028524 PA_CL_VPORT_ZSCALE_9
+0x0002853C PA_CL_VPORT_ZSCALE_10
+0x00028554 PA_CL_VPORT_ZSCALE_11
+0x0002856C PA_CL_VPORT_ZSCALE_12
+0x00028584 PA_CL_VPORT_ZSCALE_13
+0x0002859C PA_CL_VPORT_ZSCALE_14
+0x000285B4 PA_CL_VPORT_ZSCALE_15
+0x0002881C PA_CL_VS_OUT_CNTL
+0x00028818 PA_CL_VTE_CNTL
+0x00028C48 PA_SC_AA_MASK
+0x00008B40 PA_SC_AA_SAMPLE_LOCS_2S
+0x00008B44 PA_SC_AA_SAMPLE_LOCS_4S
+0x00008B48 PA_SC_AA_SAMPLE_LOCS_8S_WD0
+0x00008B4C PA_SC_AA_SAMPLE_LOCS_8S_WD1
+0x00028C20 PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX
+0x00028C1C PA_SC_AA_SAMPLE_LOCS_MCTX
+0x00028214 PA_SC_CLIPRECT_0_BR
+0x0002821C PA_SC_CLIPRECT_1_BR
+0x00028224 PA_SC_CLIPRECT_2_BR
+0x0002822C PA_SC_CLIPRECT_3_BR
+0x00028210 PA_SC_CLIPRECT_0_TL
+0x00028218 PA_SC_CLIPRECT_1_TL
+0x00028220 PA_SC_CLIPRECT_2_TL
+0x00028228 PA_SC_CLIPRECT_3_TL
+0x0002820C PA_SC_CLIPRECT_RULE
+0x00008BF0 PA_SC_ENHANCE
+0x00028244 PA_SC_GENERIC_SCISSOR_BR
+0x00028240 PA_SC_GENERIC_SCISSOR_TL
+0x00028C00 PA_SC_LINE_CNTL
+0x00028A0C PA_SC_LINE_STIPPLE
+0x00008B10 PA_SC_LINE_STIPPLE_STATE
+0x00028A4C PA_SC_MODE_CNTL
+0x00028A48 PA_SC_MPASS_PS_CNTL
+0x00008B20 PA_SC_MULTI_CHIP_CNTL
+0x00028034 PA_SC_SCREEN_SCISSOR_BR
+0x00028030 PA_SC_SCREEN_SCISSOR_TL
+0x00028254 PA_SC_VPORT_SCISSOR_0_BR
+0x0002825C PA_SC_VPORT_SCISSOR_1_BR
+0x00028264 PA_SC_VPORT_SCISSOR_2_BR
+0x0002826C PA_SC_VPORT_SCISSOR_3_BR
+0x00028274 PA_SC_VPORT_SCISSOR_4_BR
+0x0002827C PA_SC_VPORT_SCISSOR_5_BR
+0x00028284 PA_SC_VPORT_SCISSOR_6_BR
+0x0002828C PA_SC_VPORT_SCISSOR_7_BR
+0x00028294 PA_SC_VPORT_SCISSOR_8_BR
+0x0002829C PA_SC_VPORT_SCISSOR_9_BR
+0x000282A4 PA_SC_VPORT_SCISSOR_10_BR
+0x000282AC PA_SC_VPORT_SCISSOR_11_BR
+0x000282B4 PA_SC_VPORT_SCISSOR_12_BR
+0x000282BC PA_SC_VPORT_SCISSOR_13_BR
+0x000282C4 PA_SC_VPORT_SCISSOR_14_BR
+0x000282CC PA_SC_VPORT_SCISSOR_15_BR
+0x00028250 PA_SC_VPORT_SCISSOR_0_TL
+0x00028258 PA_SC_VPORT_SCISSOR_1_TL
+0x00028260 PA_SC_VPORT_SCISSOR_2_TL
+0x00028268 PA_SC_VPORT_SCISSOR_3_TL
+0x00028270 PA_SC_VPORT_SCISSOR_4_TL
+0x00028278 PA_SC_VPORT_SCISSOR_5_TL
+0x00028280 PA_SC_VPORT_SCISSOR_6_TL
+0x00028288 PA_SC_VPORT_SCISSOR_7_TL
+0x00028290 PA_SC_VPORT_SCISSOR_8_TL
+0x00028298 PA_SC_VPORT_SCISSOR_9_TL
+0x000282A0 PA_SC_VPORT_SCISSOR_10_TL
+0x000282A8 PA_SC_VPORT_SCISSOR_11_TL
+0x000282B0 PA_SC_VPORT_SCISSOR_12_TL
+0x000282B8 PA_SC_VPORT_SCISSOR_13_TL
+0x000282C0 PA_SC_VPORT_SCISSOR_14_TL
+0x000282C8 PA_SC_VPORT_SCISSOR_15_TL
+0x000282D4 PA_SC_VPORT_ZMAX_0
+0x000282DC PA_SC_VPORT_ZMAX_1
+0x000282E4 PA_SC_VPORT_ZMAX_2
+0x000282EC PA_SC_VPORT_ZMAX_3
+0x000282F4 PA_SC_VPORT_ZMAX_4
+0x000282FC PA_SC_VPORT_ZMAX_5
+0x00028304 PA_SC_VPORT_ZMAX_6
+0x0002830C PA_SC_VPORT_ZMAX_7
+0x00028314 PA_SC_VPORT_ZMAX_8
+0x0002831C PA_SC_VPORT_ZMAX_9
+0x00028324 PA_SC_VPORT_ZMAX_10
+0x0002832C PA_SC_VPORT_ZMAX_11
+0x00028334 PA_SC_VPORT_ZMAX_12
+0x0002833C PA_SC_VPORT_ZMAX_13
+0x00028344 PA_SC_VPORT_ZMAX_14
+0x0002834C PA_SC_VPORT_ZMAX_15
+0x000282D0 PA_SC_VPORT_ZMIN_0
+0x000282D8 PA_SC_VPORT_ZMIN_1
+0x000282E0 PA_SC_VPORT_ZMIN_2
+0x000282E8 PA_SC_VPORT_ZMIN_3
+0x000282F0 PA_SC_VPORT_ZMIN_4
+0x000282F8 PA_SC_VPORT_ZMIN_5
+0x00028300 PA_SC_VPORT_ZMIN_6
+0x00028308 PA_SC_VPORT_ZMIN_7
+0x00028310 PA_SC_VPORT_ZMIN_8
+0x00028318 PA_SC_VPORT_ZMIN_9
+0x00028320 PA_SC_VPORT_ZMIN_10
+0x00028328 PA_SC_VPORT_ZMIN_11
+0x00028330 PA_SC_VPORT_ZMIN_12
+0x00028338 PA_SC_VPORT_ZMIN_13
+0x00028340 PA_SC_VPORT_ZMIN_14
+0x00028348 PA_SC_VPORT_ZMIN_15
+0x00028200 PA_SC_WINDOW_OFFSET
+0x00028208 PA_SC_WINDOW_SCISSOR_BR
+0x00028204 PA_SC_WINDOW_SCISSOR_TL
+0x00028A08 PA_SU_LINE_CNTL
+0x00028A04 PA_SU_POINT_MINMAX
+0x00028A00 PA_SU_POINT_SIZE
+0x00028E0C PA_SU_POLY_OFFSET_BACK_OFFSET
+0x00028E08 PA_SU_POLY_OFFSET_BACK_SCALE
+0x00028DFC PA_SU_POLY_OFFSET_CLAMP
+0x00028DF8 PA_SU_POLY_OFFSET_DB_FMT_CNTL
+0x00028E04 PA_SU_POLY_OFFSET_FRONT_OFFSET
+0x00028E00 PA_SU_POLY_OFFSET_FRONT_SCALE
+0x00028814 PA_SU_SC_MODE_CNTL
+0x00028C08 PA_SU_VTX_CNTL
+0x00008C00 SQ_CONFIG
+0x00008C04 SQ_GPR_RESOURCE_MGMT_1
+0x00008C08 SQ_GPR_RESOURCE_MGMT_2
+0x00008C10 SQ_STACK_RESOURCE_MGMT_1
+0x00008C14 SQ_STACK_RESOURCE_MGMT_2
+0x00008C0C SQ_THREAD_RESOURCE_MGMT
+0x00028380 SQ_VTX_SEMANTIC_0
+0x00028384 SQ_VTX_SEMANTIC_1
+0x00028388 SQ_VTX_SEMANTIC_2
+0x0002838C SQ_VTX_SEMANTIC_3
+0x00028390 SQ_VTX_SEMANTIC_4
+0x00028394 SQ_VTX_SEMANTIC_5
+0x00028398 SQ_VTX_SEMANTIC_6
+0x0002839C SQ_VTX_SEMANTIC_7
+0x000283A0 SQ_VTX_SEMANTIC_8
+0x000283A4 SQ_VTX_SEMANTIC_9
+0x000283A8 SQ_VTX_SEMANTIC_10
+0x000283AC SQ_VTX_SEMANTIC_11
+0x000283B0 SQ_VTX_SEMANTIC_12
+0x000283B4 SQ_VTX_SEMANTIC_13
+0x000283B8 SQ_VTX_SEMANTIC_14
+0x000283BC SQ_VTX_SEMANTIC_15
+0x000283C0 SQ_VTX_SEMANTIC_16
+0x000283C4 SQ_VTX_SEMANTIC_17
+0x000283C8 SQ_VTX_SEMANTIC_18
+0x000283CC SQ_VTX_SEMANTIC_19
+0x000283D0 SQ_VTX_SEMANTIC_20
+0x000283D4 SQ_VTX_SEMANTIC_21
+0x000283D8 SQ_VTX_SEMANTIC_22
+0x000283DC SQ_VTX_SEMANTIC_23
+0x000283E0 SQ_VTX_SEMANTIC_24
+0x000283E4 SQ_VTX_SEMANTIC_25
+0x000283E8 SQ_VTX_SEMANTIC_26
+0x000283EC SQ_VTX_SEMANTIC_27
+0x000283F0 SQ_VTX_SEMANTIC_28
+0x000283F4 SQ_VTX_SEMANTIC_29
+0x000283F8 SQ_VTX_SEMANTIC_30
+0x000283FC SQ_VTX_SEMANTIC_31
+0x000288E0 SQ_VTX_SEMANTIC_CLEAR
+0x0003CFF4 SQ_VTX_START_INST_LOC
+0x0003C000 SQ_TEX_SAMPLER_WORD0_0
+0x0003C004 SQ_TEX_SAMPLER_WORD1_0
+0x0003C008 SQ_TEX_SAMPLER_WORD2_0
+0x00030000 SQ_ALU_CONSTANT0_0
+0x00030004 SQ_ALU_CONSTANT1_0
+0x00030008 SQ_ALU_CONSTANT2_0
+0x0003000C SQ_ALU_CONSTANT3_0
+0x0003E380 SQ_BOOL_CONST_0
+0x0003E384 SQ_BOOL_CONST_1
+0x0003E388 SQ_BOOL_CONST_2
+0x0003E200 SQ_LOOP_CONST_0
+0x0003E200 SQ_LOOP_CONST_DX10_0
+0x000281C0 SQ_ALU_CONST_BUFFER_SIZE_GS_0
+0x000281C4 SQ_ALU_CONST_BUFFER_SIZE_GS_1
+0x000281C8 SQ_ALU_CONST_BUFFER_SIZE_GS_2
+0x000281CC SQ_ALU_CONST_BUFFER_SIZE_GS_3
+0x000281D0 SQ_ALU_CONST_BUFFER_SIZE_GS_4
+0x000281D4 SQ_ALU_CONST_BUFFER_SIZE_GS_5
+0x000281D8 SQ_ALU_CONST_BUFFER_SIZE_GS_6
+0x000281DC SQ_ALU_CONST_BUFFER_SIZE_GS_7
+0x000281E0 SQ_ALU_CONST_BUFFER_SIZE_GS_8
+0x000281E4 SQ_ALU_CONST_BUFFER_SIZE_GS_9
+0x000281E8 SQ_ALU_CONST_BUFFER_SIZE_GS_10
+0x000281EC SQ_ALU_CONST_BUFFER_SIZE_GS_11
+0x000281F0 SQ_ALU_CONST_BUFFER_SIZE_GS_12
+0x000281F4 SQ_ALU_CONST_BUFFER_SIZE_GS_13
+0x000281F8 SQ_ALU_CONST_BUFFER_SIZE_GS_14
+0x000281FC SQ_ALU_CONST_BUFFER_SIZE_GS_15
+0x00028140 SQ_ALU_CONST_BUFFER_SIZE_PS_0
+0x00028144 SQ_ALU_CONST_BUFFER_SIZE_PS_1
+0x00028148 SQ_ALU_CONST_BUFFER_SIZE_PS_2
+0x0002814C SQ_ALU_CONST_BUFFER_SIZE_PS_3
+0x00028150 SQ_ALU_CONST_BUFFER_SIZE_PS_4
+0x00028154 SQ_ALU_CONST_BUFFER_SIZE_PS_5
+0x00028158 SQ_ALU_CONST_BUFFER_SIZE_PS_6
+0x0002815C SQ_ALU_CONST_BUFFER_SIZE_PS_7
+0x00028160 SQ_ALU_CONST_BUFFER_SIZE_PS_8
+0x00028164 SQ_ALU_CONST_BUFFER_SIZE_PS_9
+0x00028168 SQ_ALU_CONST_BUFFER_SIZE_PS_10
+0x0002816C SQ_ALU_CONST_BUFFER_SIZE_PS_11
+0x00028170 SQ_ALU_CONST_BUFFER_SIZE_PS_12
+0x00028174 SQ_ALU_CONST_BUFFER_SIZE_PS_13
+0x00028178 SQ_ALU_CONST_BUFFER_SIZE_PS_14
+0x0002817C SQ_ALU_CONST_BUFFER_SIZE_PS_15
+0x00028180 SQ_ALU_CONST_BUFFER_SIZE_VS_0
+0x00028184 SQ_ALU_CONST_BUFFER_SIZE_VS_1
+0x00028188 SQ_ALU_CONST_BUFFER_SIZE_VS_2
+0x0002818C SQ_ALU_CONST_BUFFER_SIZE_VS_3
+0x00028190 SQ_ALU_CONST_BUFFER_SIZE_VS_4
+0x00028194 SQ_ALU_CONST_BUFFER_SIZE_VS_5
+0x00028198 SQ_ALU_CONST_BUFFER_SIZE_VS_6
+0x0002819C SQ_ALU_CONST_BUFFER_SIZE_VS_7
+0x000281A0 SQ_ALU_CONST_BUFFER_SIZE_VS_8
+0x000281A4 SQ_ALU_CONST_BUFFER_SIZE_VS_9
+0x000281A8 SQ_ALU_CONST_BUFFER_SIZE_VS_10
+0x000281AC SQ_ALU_CONST_BUFFER_SIZE_VS_11
+0x000281B0 SQ_ALU_CONST_BUFFER_SIZE_VS_12
+0x000281B4 SQ_ALU_CONST_BUFFER_SIZE_VS_13
+0x000281B8 SQ_ALU_CONST_BUFFER_SIZE_VS_14
+0x000281BC SQ_ALU_CONST_BUFFER_SIZE_VS_15
+0x000289C0 SQ_ALU_CONST_CACHE_GS_0
+0x000289C4 SQ_ALU_CONST_CACHE_GS_1
+0x000289C8 SQ_ALU_CONST_CACHE_GS_2
+0x000289CC SQ_ALU_CONST_CACHE_GS_3
+0x000289D0 SQ_ALU_CONST_CACHE_GS_4
+0x000289D4 SQ_ALU_CONST_CACHE_GS_5
+0x000289D8 SQ_ALU_CONST_CACHE_GS_6
+0x000289DC SQ_ALU_CONST_CACHE_GS_7
+0x000289E0 SQ_ALU_CONST_CACHE_GS_8
+0x000289E4 SQ_ALU_CONST_CACHE_GS_9
+0x000289E8 SQ_ALU_CONST_CACHE_GS_10
+0x000289EC SQ_ALU_CONST_CACHE_GS_11
+0x000289F0 SQ_ALU_CONST_CACHE_GS_12
+0x000289F4 SQ_ALU_CONST_CACHE_GS_13
+0x000289F8 SQ_ALU_CONST_CACHE_GS_14
+0x000289FC SQ_ALU_CONST_CACHE_GS_15
+0x00028940 SQ_ALU_CONST_CACHE_PS_0
+0x00028944 SQ_ALU_CONST_CACHE_PS_1
+0x00028948 SQ_ALU_CONST_CACHE_PS_2
+0x0002894C SQ_ALU_CONST_CACHE_PS_3
+0x00028950 SQ_ALU_CONST_CACHE_PS_4
+0x00028954 SQ_ALU_CONST_CACHE_PS_5
+0x00028958 SQ_ALU_CONST_CACHE_PS_6
+0x0002895C SQ_ALU_CONST_CACHE_PS_7
+0x00028960 SQ_ALU_CONST_CACHE_PS_8
+0x00028964 SQ_ALU_CONST_CACHE_PS_9
+0x00028968 SQ_ALU_CONST_CACHE_PS_10
+0x0002896C SQ_ALU_CONST_CACHE_PS_11
+0x00028970 SQ_ALU_CONST_CACHE_PS_12
+0x00028974 SQ_ALU_CONST_CACHE_PS_13
+0x00028978 SQ_ALU_CONST_CACHE_PS_14
+0x0002897C SQ_ALU_CONST_CACHE_PS_15
+0x00028980 SQ_ALU_CONST_CACHE_VS_0
+0x00028984 SQ_ALU_CONST_CACHE_VS_1
+0x00028988 SQ_ALU_CONST_CACHE_VS_2
+0x0002898C SQ_ALU_CONST_CACHE_VS_3
+0x00028990 SQ_ALU_CONST_CACHE_VS_4
+0x00028994 SQ_ALU_CONST_CACHE_VS_5
+0x00028998 SQ_ALU_CONST_CACHE_VS_6
+0x0002899C SQ_ALU_CONST_CACHE_VS_7
+0x000289A0 SQ_ALU_CONST_CACHE_VS_8
+0x000289A4 SQ_ALU_CONST_CACHE_VS_9
+0x000289A8 SQ_ALU_CONST_CACHE_VS_10
+0x000289AC SQ_ALU_CONST_CACHE_VS_11
+0x000289B0 SQ_ALU_CONST_CACHE_VS_12
+0x000289B4 SQ_ALU_CONST_CACHE_VS_13
+0x000289B8 SQ_ALU_CONST_CACHE_VS_14
+0x000289BC SQ_ALU_CONST_CACHE_VS_15
+0x000288D8 SQ_PGM_CF_OFFSET_ES
+0x000288DC SQ_PGM_CF_OFFSET_FS
+0x000288D4 SQ_PGM_CF_OFFSET_GS
+0x000288CC SQ_PGM_CF_OFFSET_PS
+0x000288D0 SQ_PGM_CF_OFFSET_VS
+0x00028854 SQ_PGM_EXPORTS_PS
+0x00028890 SQ_PGM_RESOURCES_ES
+0x000288A4 SQ_PGM_RESOURCES_FS
+0x0002887C SQ_PGM_RESOURCES_GS
+0x00028850 SQ_PGM_RESOURCES_PS
+0x00028868 SQ_PGM_RESOURCES_VS
+0x00009100 SPI_CONFIG_CNTL
+0x0000913C SPI_CONFIG_CNTL_1
+0x000286DC SPI_FOG_CNTL
+0x000286E4 SPI_FOG_FUNC_BIAS
+0x000286E0 SPI_FOG_FUNC_SCALE
+0x000286D8 SPI_INPUT_Z
+0x000286D4 SPI_INTERP_CONTROL_0
+0x00028644 SPI_PS_INPUT_CNTL_0
+0x00028648 SPI_PS_INPUT_CNTL_1
+0x0002864C SPI_PS_INPUT_CNTL_2
+0x00028650 SPI_PS_INPUT_CNTL_3
+0x00028654 SPI_PS_INPUT_CNTL_4
+0x00028658 SPI_PS_INPUT_CNTL_5
+0x0002865C SPI_PS_INPUT_CNTL_6
+0x00028660 SPI_PS_INPUT_CNTL_7
+0x00028664 SPI_PS_INPUT_CNTL_8
+0x00028668 SPI_PS_INPUT_CNTL_9
+0x0002866C SPI_PS_INPUT_CNTL_10
+0x00028670 SPI_PS_INPUT_CNTL_11
+0x00028674 SPI_PS_INPUT_CNTL_12
+0x00028678 SPI_PS_INPUT_CNTL_13
+0x0002867C SPI_PS_INPUT_CNTL_14
+0x00028680 SPI_PS_INPUT_CNTL_15
+0x00028684 SPI_PS_INPUT_CNTL_16
+0x00028688 SPI_PS_INPUT_CNTL_17
+0x0002868C SPI_PS_INPUT_CNTL_18
+0x00028690 SPI_PS_INPUT_CNTL_19
+0x00028694 SPI_PS_INPUT_CNTL_20
+0x00028698 SPI_PS_INPUT_CNTL_21
+0x0002869C SPI_PS_INPUT_CNTL_22
+0x000286A0 SPI_PS_INPUT_CNTL_23
+0x000286A4 SPI_PS_INPUT_CNTL_24
+0x000286A8 SPI_PS_INPUT_CNTL_25
+0x000286AC SPI_PS_INPUT_CNTL_26
+0x000286B0 SPI_PS_INPUT_CNTL_27
+0x000286B4 SPI_PS_INPUT_CNTL_28
+0x000286B8 SPI_PS_INPUT_CNTL_29
+0x000286BC SPI_PS_INPUT_CNTL_30
+0x000286C0 SPI_PS_INPUT_CNTL_31
+0x000286CC SPI_PS_IN_CONTROL_0
+0x000286D0 SPI_PS_IN_CONTROL_1
+0x000286C4 SPI_VS_OUT_CONFIG
+0x00028614 SPI_VS_OUT_ID_0
+0x00028618 SPI_VS_OUT_ID_1
+0x0002861C SPI_VS_OUT_ID_2
+0x00028620 SPI_VS_OUT_ID_3
+0x00028624 SPI_VS_OUT_ID_4
+0x00028628 SPI_VS_OUT_ID_5
+0x0002862C SPI_VS_OUT_ID_6
+0x00028630 SPI_VS_OUT_ID_7
+0x00028634 SPI_VS_OUT_ID_8
+0x00028638 SPI_VS_OUT_ID_9
+0x00028438 SX_ALPHA_REF
+0x00028410 SX_ALPHA_TEST_CONTROL
+0x00028350 SX_MISC
+0x0000A020 SMX_DC_CTL0
+0x0000A024 SMX_DC_CTL1
+0x0000A028 SMX_DC_CTL2
+0x00009608 TC_CNTL
+0x00009604 TC_INVALIDATE
+0x00009490 TD_CNTL
+0x00009400 TD_FILTER4
+0x00009404 TD_FILTER4_1
+0x00009408 TD_FILTER4_2
+0x0000940C TD_FILTER4_3
+0x00009410 TD_FILTER4_4
+0x00009414 TD_FILTER4_5
+0x00009418 TD_FILTER4_6
+0x0000941C TD_FILTER4_7
+0x00009420 TD_FILTER4_8
+0x00009424 TD_FILTER4_9
+0x00009428 TD_FILTER4_10
+0x0000942C TD_FILTER4_11
+0x00009430 TD_FILTER4_12
+0x00009434 TD_FILTER4_13
+0x00009438 TD_FILTER4_14
+0x0000943C TD_FILTER4_15
+0x00009440 TD_FILTER4_16
+0x00009444 TD_FILTER4_17
+0x00009448 TD_FILTER4_18
+0x0000944C TD_FILTER4_19
+0x00009450 TD_FILTER4_20
+0x00009454 TD_FILTER4_21
+0x00009458 TD_FILTER4_22
+0x0000945C TD_FILTER4_23
+0x00009460 TD_FILTER4_24
+0x00009464 TD_FILTER4_25
+0x00009468 TD_FILTER4_26
+0x0000946C TD_FILTER4_27
+0x00009470 TD_FILTER4_28
+0x00009474 TD_FILTER4_29
+0x00009478 TD_FILTER4_30
+0x0000947C TD_FILTER4_31
+0x00009480 TD_FILTER4_32
+0x00009484 TD_FILTER4_33
+0x00009488 TD_FILTER4_34
+0x0000948C TD_FILTER4_35
+0x0000A80C TD_GS_SAMPLER0_BORDER_ALPHA
+0x0000A81C TD_GS_SAMPLER1_BORDER_ALPHA
+0x0000A82C TD_GS_SAMPLER2_BORDER_ALPHA
+0x0000A83C TD_GS_SAMPLER3_BORDER_ALPHA
+0x0000A84C TD_GS_SAMPLER4_BORDER_ALPHA
+0x0000A85C TD_GS_SAMPLER5_BORDER_ALPHA
+0x0000A86C TD_GS_SAMPLER6_BORDER_ALPHA
+0x0000A87C TD_GS_SAMPLER7_BORDER_ALPHA
+0x0000A88C TD_GS_SAMPLER8_BORDER_ALPHA
+0x0000A89C TD_GS_SAMPLER9_BORDER_ALPHA
+0x0000A8AC TD_GS_SAMPLER10_BORDER_ALPHA
+0x0000A8BC TD_GS_SAMPLER11_BORDER_ALPHA
+0x0000A8CC TD_GS_SAMPLER12_BORDER_ALPHA
+0x0000A8DC TD_GS_SAMPLER13_BORDER_ALPHA
+0x0000A8EC TD_GS_SAMPLER14_BORDER_ALPHA
+0x0000A8FC TD_GS_SAMPLER15_BORDER_ALPHA
+0x0000A90C TD_GS_SAMPLER16_BORDER_ALPHA
+0x0000A91C TD_GS_SAMPLER17_BORDER_ALPHA
+0x0000A808 TD_GS_SAMPLER0_BORDER_BLUE
+0x0000A818 TD_GS_SAMPLER1_BORDER_BLUE
+0x0000A828 TD_GS_SAMPLER2_BORDER_BLUE
+0x0000A838 TD_GS_SAMPLER3_BORDER_BLUE
+0x0000A848 TD_GS_SAMPLER4_BORDER_BLUE
+0x0000A858 TD_GS_SAMPLER5_BORDER_BLUE
+0x0000A868 TD_GS_SAMPLER6_BORDER_BLUE
+0x0000A878 TD_GS_SAMPLER7_BORDER_BLUE
+0x0000A888 TD_GS_SAMPLER8_BORDER_BLUE
+0x0000A898 TD_GS_SAMPLER9_BORDER_BLUE
+0x0000A8A8 TD_GS_SAMPLER10_BORDER_BLUE
+0x0000A8B8 TD_GS_SAMPLER11_BORDER_BLUE
+0x0000A8C8 TD_GS_SAMPLER12_BORDER_BLUE
+0x0000A8D8 TD_GS_SAMPLER13_BORDER_BLUE
+0x0000A8E8 TD_GS_SAMPLER14_BORDER_BLUE
+0x0000A8F8 TD_GS_SAMPLER15_BORDER_BLUE
+0x0000A908 TD_GS_SAMPLER16_BORDER_BLUE
+0x0000A918 TD_GS_SAMPLER17_BORDER_BLUE
+0x0000A804 TD_GS_SAMPLER0_BORDER_GREEN
+0x0000A814 TD_GS_SAMPLER1_BORDER_GREEN
+0x0000A824 TD_GS_SAMPLER2_BORDER_GREEN
+0x0000A834 TD_GS_SAMPLER3_BORDER_GREEN
+0x0000A844 TD_GS_SAMPLER4_BORDER_GREEN
+0x0000A854 TD_GS_SAMPLER5_BORDER_GREEN
+0x0000A864 TD_GS_SAMPLER6_BORDER_GREEN
+0x0000A874 TD_GS_SAMPLER7_BORDER_GREEN
+0x0000A884 TD_GS_SAMPLER8_BORDER_GREEN
+0x0000A894 TD_GS_SAMPLER9_BORDER_GREEN
+0x0000A8A4 TD_GS_SAMPLER10_BORDER_GREEN
+0x0000A8B4 TD_GS_SAMPLER11_BORDER_GREEN
+0x0000A8C4 TD_GS_SAMPLER12_BORDER_GREEN
+0x0000A8D4 TD_GS_SAMPLER13_BORDER_GREEN
+0x0000A8E4 TD_GS_SAMPLER14_BORDER_GREEN
+0x0000A8F4 TD_GS_SAMPLER15_BORDER_GREEN
+0x0000A904 TD_GS_SAMPLER16_BORDER_GREEN
+0x0000A914 TD_GS_SAMPLER17_BORDER_GREEN
+0x0000A800 TD_GS_SAMPLER0_BORDER_RED
+0x0000A810 TD_GS_SAMPLER1_BORDER_RED
+0x0000A820 TD_GS_SAMPLER2_BORDER_RED
+0x0000A830 TD_GS_SAMPLER3_BORDER_RED
+0x0000A840 TD_GS_SAMPLER4_BORDER_RED
+0x0000A850 TD_GS_SAMPLER5_BORDER_RED
+0x0000A860 TD_GS_SAMPLER6_BORDER_RED
+0x0000A870 TD_GS_SAMPLER7_BORDER_RED
+0x0000A880 TD_GS_SAMPLER8_BORDER_RED
+0x0000A890 TD_GS_SAMPLER9_BORDER_RED
+0x0000A8A0 TD_GS_SAMPLER10_BORDER_RED
+0x0000A8B0 TD_GS_SAMPLER11_BORDER_RED
+0x0000A8C0 TD_GS_SAMPLER12_BORDER_RED
+0x0000A8D0 TD_GS_SAMPLER13_BORDER_RED
+0x0000A8E0 TD_GS_SAMPLER14_BORDER_RED
+0x0000A8F0 TD_GS_SAMPLER15_BORDER_RED
+0x0000A900 TD_GS_SAMPLER16_BORDER_RED
+0x0000A910 TD_GS_SAMPLER17_BORDER_RED
+0x0000A40C TD_PS_SAMPLER0_BORDER_ALPHA
+0x0000A41C TD_PS_SAMPLER1_BORDER_ALPHA
+0x0000A42C TD_PS_SAMPLER2_BORDER_ALPHA
+0x0000A43C TD_PS_SAMPLER3_BORDER_ALPHA
+0x0000A44C TD_PS_SAMPLER4_BORDER_ALPHA
+0x0000A45C TD_PS_SAMPLER5_BORDER_ALPHA
+0x0000A46C TD_PS_SAMPLER6_BORDER_ALPHA
+0x0000A47C TD_PS_SAMPLER7_BORDER_ALPHA
+0x0000A48C TD_PS_SAMPLER8_BORDER_ALPHA
+0x0000A49C TD_PS_SAMPLER9_BORDER_ALPHA
+0x0000A4AC TD_PS_SAMPLER10_BORDER_ALPHA
+0x0000A4BC TD_PS_SAMPLER11_BORDER_ALPHA
+0x0000A4CC TD_PS_SAMPLER12_BORDER_ALPHA
+0x0000A4DC TD_PS_SAMPLER13_BORDER_ALPHA
+0x0000A4EC TD_PS_SAMPLER14_BORDER_ALPHA
+0x0000A4FC TD_PS_SAMPLER15_BORDER_ALPHA
+0x0000A50C TD_PS_SAMPLER16_BORDER_ALPHA
+0x0000A51C TD_PS_SAMPLER17_BORDER_ALPHA
+0x0000A408 TD_PS_SAMPLER0_BORDER_BLUE
+0x0000A418 TD_PS_SAMPLER1_BORDER_BLUE
+0x0000A428 TD_PS_SAMPLER2_BORDER_BLUE
+0x0000A438 TD_PS_SAMPLER3_BORDER_BLUE
+0x0000A448 TD_PS_SAMPLER4_BORDER_BLUE
+0x0000A458 TD_PS_SAMPLER5_BORDER_BLUE
+0x0000A468 TD_PS_SAMPLER6_BORDER_BLUE
+0x0000A478 TD_PS_SAMPLER7_BORDER_BLUE
+0x0000A488 TD_PS_SAMPLER8_BORDER_BLUE
+0x0000A498 TD_PS_SAMPLER9_BORDER_BLUE
+0x0000A4A8 TD_PS_SAMPLER10_BORDER_BLUE
+0x0000A4B8 TD_PS_SAMPLER11_BORDER_BLUE
+0x0000A4C8 TD_PS_SAMPLER12_BORDER_BLUE
+0x0000A4D8 TD_PS_SAMPLER13_BORDER_BLUE
+0x0000A4E8 TD_PS_SAMPLER14_BORDER_BLUE
+0x0000A4F8 TD_PS_SAMPLER15_BORDER_BLUE
+0x0000A508 TD_PS_SAMPLER16_BORDER_BLUE
+0x0000A518 TD_PS_SAMPLER17_BORDER_BLUE
+0x0000A404 TD_PS_SAMPLER0_BORDER_GREEN
+0x0000A414 TD_PS_SAMPLER1_BORDER_GREEN
+0x0000A424 TD_PS_SAMPLER2_BORDER_GREEN
+0x0000A434 TD_PS_SAMPLER3_BORDER_GREEN
+0x0000A444 TD_PS_SAMPLER4_BORDER_GREEN
+0x0000A454 TD_PS_SAMPLER5_BORDER_GREEN
+0x0000A464 TD_PS_SAMPLER6_BORDER_GREEN
+0x0000A474 TD_PS_SAMPLER7_BORDER_GREEN
+0x0000A484 TD_PS_SAMPLER8_BORDER_GREEN
+0x0000A494 TD_PS_SAMPLER9_BORDER_GREEN
+0x0000A4A4 TD_PS_SAMPLER10_BORDER_GREEN
+0x0000A4B4 TD_PS_SAMPLER11_BORDER_GREEN
+0x0000A4C4 TD_PS_SAMPLER12_BORDER_GREEN
+0x0000A4D4 TD_PS_SAMPLER13_BORDER_GREEN
+0x0000A4E4 TD_PS_SAMPLER14_BORDER_GREEN
+0x0000A4F4 TD_PS_SAMPLER15_BORDER_GREEN
+0x0000A504 TD_PS_SAMPLER16_BORDER_GREEN
+0x0000A514 TD_PS_SAMPLER17_BORDER_GREEN
+0x0000A400 TD_PS_SAMPLER0_BORDER_RED
+0x0000A410 TD_PS_SAMPLER1_BORDER_RED
+0x0000A420 TD_PS_SAMPLER2_BORDER_RED
+0x0000A430 TD_PS_SAMPLER3_BORDER_RED
+0x0000A440 TD_PS_SAMPLER4_BORDER_RED
+0x0000A450 TD_PS_SAMPLER5_BORDER_RED
+0x0000A460 TD_PS_SAMPLER6_BORDER_RED
+0x0000A470 TD_PS_SAMPLER7_BORDER_RED
+0x0000A480 TD_PS_SAMPLER8_BORDER_RED
+0x0000A490 TD_PS_SAMPLER9_BORDER_RED
+0x0000A4A0 TD_PS_SAMPLER10_BORDER_RED
+0x0000A4B0 TD_PS_SAMPLER11_BORDER_RED
+0x0000A4C0 TD_PS_SAMPLER12_BORDER_RED
+0x0000A4D0 TD_PS_SAMPLER13_BORDER_RED
+0x0000A4E0 TD_PS_SAMPLER14_BORDER_RED
+0x0000A4F0 TD_PS_SAMPLER15_BORDER_RED
+0x0000A500 TD_PS_SAMPLER16_BORDER_RED
+0x0000A510 TD_PS_SAMPLER17_BORDER_RED
+0x0000AA00 TD_PS_SAMPLER0_CLEARTYPE_KERNEL
+0x0000AA04 TD_PS_SAMPLER1_CLEARTYPE_KERNEL
+0x0000AA08 TD_PS_SAMPLER2_CLEARTYPE_KERNEL
+0x0000AA0C TD_PS_SAMPLER3_CLEARTYPE_KERNEL
+0x0000AA10 TD_PS_SAMPLER4_CLEARTYPE_KERNEL
+0x0000AA14 TD_PS_SAMPLER5_CLEARTYPE_KERNEL
+0x0000AA18 TD_PS_SAMPLER6_CLEARTYPE_KERNEL
+0x0000AA1C TD_PS_SAMPLER7_CLEARTYPE_KERNEL
+0x0000AA20 TD_PS_SAMPLER8_CLEARTYPE_KERNEL
+0x0000AA24 TD_PS_SAMPLER9_CLEARTYPE_KERNEL
+0x0000AA28 TD_PS_SAMPLER10_CLEARTYPE_KERNEL
+0x0000AA2C TD_PS_SAMPLER11_CLEARTYPE_KERNEL
+0x0000AA30 TD_PS_SAMPLER12_CLEARTYPE_KERNEL
+0x0000AA34 TD_PS_SAMPLER13_CLEARTYPE_KERNEL
+0x0000AA38 TD_PS_SAMPLER14_CLEARTYPE_KERNEL
+0x0000AA3C TD_PS_SAMPLER15_CLEARTYPE_KERNEL
+0x0000AA40 TD_PS_SAMPLER16_CLEARTYPE_KERNEL
+0x0000AA44 TD_PS_SAMPLER17_CLEARTYPE_KERNEL
+0x0000A60C TD_VS_SAMPLER0_BORDER_ALPHA
+0x0000A61C TD_VS_SAMPLER1_BORDER_ALPHA
+0x0000A62C TD_VS_SAMPLER2_BORDER_ALPHA
+0x0000A63C TD_VS_SAMPLER3_BORDER_ALPHA
+0x0000A64C TD_VS_SAMPLER4_BORDER_ALPHA
+0x0000A65C TD_VS_SAMPLER5_BORDER_ALPHA
+0x0000A66C TD_VS_SAMPLER6_BORDER_ALPHA
+0x0000A67C TD_VS_SAMPLER7_BORDER_ALPHA
+0x0000A68C TD_VS_SAMPLER8_BORDER_ALPHA
+0x0000A69C TD_VS_SAMPLER9_BORDER_ALPHA
+0x0000A6AC TD_VS_SAMPLER10_BORDER_ALPHA
+0x0000A6BC TD_VS_SAMPLER11_BORDER_ALPHA
+0x0000A6CC TD_VS_SAMPLER12_BORDER_ALPHA
+0x0000A6DC TD_VS_SAMPLER13_BORDER_ALPHA
+0x0000A6EC TD_VS_SAMPLER14_BORDER_ALPHA
+0x0000A6FC TD_VS_SAMPLER15_BORDER_ALPHA
+0x0000A70C TD_VS_SAMPLER16_BORDER_ALPHA
+0x0000A71C TD_VS_SAMPLER17_BORDER_ALPHA
+0x0000A608 TD_VS_SAMPLER0_BORDER_BLUE
+0x0000A618 TD_VS_SAMPLER1_BORDER_BLUE
+0x0000A628 TD_VS_SAMPLER2_BORDER_BLUE
+0x0000A638 TD_VS_SAMPLER3_BORDER_BLUE
+0x0000A648 TD_VS_SAMPLER4_BORDER_BLUE
+0x0000A658 TD_VS_SAMPLER5_BORDER_BLUE
+0x0000A668 TD_VS_SAMPLER6_BORDER_BLUE
+0x0000A678 TD_VS_SAMPLER7_BORDER_BLUE
+0x0000A688 TD_VS_SAMPLER8_BORDER_BLUE
+0x0000A698 TD_VS_SAMPLER9_BORDER_BLUE
+0x0000A6A8 TD_VS_SAMPLER10_BORDER_BLUE
+0x0000A6B8 TD_VS_SAMPLER11_BORDER_BLUE
+0x0000A6C8 TD_VS_SAMPLER12_BORDER_BLUE
+0x0000A6D8 TD_VS_SAMPLER13_BORDER_BLUE
+0x0000A6E8 TD_VS_SAMPLER14_BORDER_BLUE
+0x0000A6F8 TD_VS_SAMPLER15_BORDER_BLUE
+0x0000A708 TD_VS_SAMPLER16_BORDER_BLUE
+0x0000A718 TD_VS_SAMPLER17_BORDER_BLUE
+0x0000A604 TD_VS_SAMPLER0_BORDER_GREEN
+0x0000A614 TD_VS_SAMPLER1_BORDER_GREEN
+0x0000A624 TD_VS_SAMPLER2_BORDER_GREEN
+0x0000A634 TD_VS_SAMPLER3_BORDER_GREEN
+0x0000A644 TD_VS_SAMPLER4_BORDER_GREEN
+0x0000A654 TD_VS_SAMPLER5_BORDER_GREEN
+0x0000A664 TD_VS_SAMPLER6_BORDER_GREEN
+0x0000A674 TD_VS_SAMPLER7_BORDER_GREEN
+0x0000A684 TD_VS_SAMPLER8_BORDER_GREEN
+0x0000A694 TD_VS_SAMPLER9_BORDER_GREEN
+0x0000A6A4 TD_VS_SAMPLER10_BORDER_GREEN
+0x0000A6B4 TD_VS_SAMPLER11_BORDER_GREEN
+0x0000A6C4 TD_VS_SAMPLER12_BORDER_GREEN
+0x0000A6D4 TD_VS_SAMPLER13_BORDER_GREEN
+0x0000A6E4 TD_VS_SAMPLER14_BORDER_GREEN
+0x0000A6F4 TD_VS_SAMPLER15_BORDER_GREEN
+0x0000A704 TD_VS_SAMPLER16_BORDER_GREEN
+0x0000A714 TD_VS_SAMPLER17_BORDER_GREEN
+0x0000A600 TD_VS_SAMPLER0_BORDER_RED
+0x0000A610 TD_VS_SAMPLER1_BORDER_RED
+0x0000A620 TD_VS_SAMPLER2_BORDER_RED
+0x0000A630 TD_VS_SAMPLER3_BORDER_RED
+0x0000A640 TD_VS_SAMPLER4_BORDER_RED
+0x0000A650 TD_VS_SAMPLER5_BORDER_RED
+0x0000A660 TD_VS_SAMPLER6_BORDER_RED
+0x0000A670 TD_VS_SAMPLER7_BORDER_RED
+0x0000A680 TD_VS_SAMPLER8_BORDER_RED
+0x0000A690 TD_VS_SAMPLER9_BORDER_RED
+0x0000A6A0 TD_VS_SAMPLER10_BORDER_RED
+0x0000A6B0 TD_VS_SAMPLER11_BORDER_RED
+0x0000A6C0 TD_VS_SAMPLER12_BORDER_RED
+0x0000A6D0 TD_VS_SAMPLER13_BORDER_RED
+0x0000A6E0 TD_VS_SAMPLER14_BORDER_RED
+0x0000A6F0 TD_VS_SAMPLER15_BORDER_RED
+0x0000A700 TD_VS_SAMPLER16_BORDER_RED
+0x0000A710 TD_VS_SAMPLER17_BORDER_RED
+0x00009508 TA_CNTL_AUX
+0x0002802C DB_DEPTH_CLEAR
+0x00028D24 DB_HTILE_SURFACE
+0x00028D34 DB_PREFETCH_LIMIT
+0x00028D30 DB_PRELOAD_CONTROL
+0x00028D0C DB_RENDER_CONTROL
+0x00028D10 DB_RENDER_OVERRIDE
+0x0002880C DB_SHADER_CONTROL
+0x00028D2C DB_SRESULTS_COMPARE_STATE1
+0x00028430 DB_STENCILREFMASK
+0x00028434 DB_STENCILREFMASK_BF
+0x00028028 DB_STENCIL_CLEAR
+0x00028780 CB_BLEND0_CONTROL
+0x00028784 CB_BLEND1_CONTROL
+0x00028788 CB_BLEND2_CONTROL
+0x0002878C CB_BLEND3_CONTROL
+0x00028790 CB_BLEND4_CONTROL
+0x00028794 CB_BLEND5_CONTROL
+0x00028798 CB_BLEND6_CONTROL
+0x0002879C CB_BLEND7_CONTROL
+0x00028804 CB_BLEND_CONTROL
+0x00028420 CB_BLEND_ALPHA
+0x0002841C CB_BLEND_BLUE
+0x00028418 CB_BLEND_GREEN
+0x00028414 CB_BLEND_RED
+0x0002812C CB_CLEAR_ALPHA
+0x00028128 CB_CLEAR_BLUE
+0x00028124 CB_CLEAR_GREEN
+0x00028120 CB_CLEAR_RED
+0x00028C30 CB_CLRCMP_CONTROL
+0x00028C38 CB_CLRCMP_DST
+0x00028C3C CB_CLRCMP_MSK
+0x00028C34 CB_CLRCMP_SRC
+0x00028100 CB_COLOR0_MASK
+0x00028104 CB_COLOR1_MASK
+0x00028108 CB_COLOR2_MASK
+0x0002810C CB_COLOR3_MASK
+0x00028110 CB_COLOR4_MASK
+0x00028114 CB_COLOR5_MASK
+0x00028118 CB_COLOR6_MASK
+0x0002811C CB_COLOR7_MASK
+0x00028080 CB_COLOR0_VIEW
+0x00028084 CB_COLOR1_VIEW
+0x00028088 CB_COLOR2_VIEW
+0x0002808C CB_COLOR3_VIEW
+0x00028090 CB_COLOR4_VIEW
+0x00028094 CB_COLOR5_VIEW
+0x00028098 CB_COLOR6_VIEW
+0x0002809C CB_COLOR7_VIEW
+0x00028808 CB_COLOR_CONTROL
+0x0002842C CB_FOG_BLUE
+0x00028428 CB_FOG_GREEN
+0x00028424 CB_FOG_RED
+0x00008040 WAIT_UNTIL
+0x00008950 CC_GC_SHADER_PIPE_CONFIG
+0x00008954 GC_USER_SHADER_PIPE_CONFIG
+0x00009714 VC_ENHANCE
+0x00009830 DB_DEBUG
+0x00009838 DB_WATERMARKS
+0x00028D28 DB_SRESULTS_COMPARE_STATE0
+0x00028D44 DB_ALPHA_TO_MASK
+0x00009504 TA_CNTL
+0x00009700 VC_CNTL
+0x00009718 VC_CONFIG
+0x0000A02C SMX_DC_MC_INTF_CTL
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 287fceb..626d518 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -113,6 +113,7 @@
 	uint32_t size_reg;
 	uint32_t tmp;
 
+	radeon_gart_restore(rdev);
 	tmp = RREG32_MC(RS690_AIC_CTRL_SCRATCH);
 	tmp |= RS690_DIS_OUT_OF_PCI_GART_ACCESS;
 	WREG32_MC(RS690_AIC_CTRL_SCRATCH, tmp);
@@ -150,9 +151,8 @@
 		WREG32(RADEON_AGP_BASE, 0xFFFFFFFF);
 		WREG32(RS480_AGP_BASE_2, 0);
 	}
-	tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
-	tmp = REG_SET(RS690_MC_AGP_TOP, tmp >> 16);
-	tmp |= REG_SET(RS690_MC_AGP_START, rdev->mc.gtt_location >> 16);
+	tmp = REG_SET(RS690_MC_AGP_TOP, rdev->mc.gtt_end >> 16);
+	tmp |= REG_SET(RS690_MC_AGP_START, rdev->mc.gtt_start >> 16);
 	if ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740)) {
 		WREG32_MC(RS690_MCCFG_AGP_LOCATION, tmp);
 		tmp = RREG32(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS;
@@ -251,14 +251,19 @@
 	}
 }
 
-void rs400_vram_info(struct radeon_device *rdev)
+void rs400_mc_init(struct radeon_device *rdev)
 {
+	u64 base;
+
 	rs400_gart_adjust_size(rdev);
+	rdev->mc.igp_sideport_enabled = radeon_combios_sideport_present(rdev);
 	/* DDR for all card after R300 & IGP */
 	rdev->mc.vram_is_ddr = true;
 	rdev->mc.vram_width = 128;
-
 	r100_vram_init_sizes(rdev);
+	base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
+	radeon_vram_location(rdev, &rdev->mc, base);
+	radeon_gtt_location(rdev, &rdev->mc);
 }
 
 uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg)
@@ -362,22 +367,6 @@
 #endif
 }
 
-static int rs400_mc_init(struct radeon_device *rdev)
-{
-	int r;
-	u32 tmp;
-
-	/* Setup GPU memory space */
-	tmp = RREG32(R_00015C_NB_TOM);
-	rdev->mc.vram_location = G_00015C_MC_FB_START(tmp) << 16;
-	rdev->mc.gtt_location = 0xFFFFFFFFUL;
-	r = radeon_mc_setup(rdev);
-	rdev->mc.igp_sideport_enabled = radeon_combios_sideport_present(rdev);
-	if (r)
-		return r;
-	return 0;
-}
-
 void rs400_mc_program(struct radeon_device *rdev)
 {
 	struct r100_mc_save save;
@@ -516,12 +505,8 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	rs400_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = rs400_mc_init(rdev);
-	if (r)
-		return r;
+	/* initialize memory controller */
+	rs400_mc_init(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
 	if (r)
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index c381856..47f046b 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -45,23 +45,6 @@
 void rs600_gpu_init(struct radeon_device *rdev);
 int rs600_mc_wait_for_idle(struct radeon_device *rdev);
 
-int rs600_mc_init(struct radeon_device *rdev)
-{
-	/* read back the MC value from the hw */
-	int r;
-	u32 tmp;
-
-	/* Setup GPU memory space */
-	tmp = RREG32_MC(R_000004_MC_FB_LOCATION);
-	rdev->mc.vram_location = G_000004_MC_FB_START(tmp) << 16;
-	rdev->mc.gtt_location = 0xffffffffUL;
-	r = radeon_mc_setup(rdev);
-	rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
-	if (r)
-		return r;
-	return 0;
-}
-
 /* hpd for digital panel detect/disconnect */
 bool rs600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 {
@@ -213,6 +196,7 @@
 	r = radeon_gart_table_vram_pin(rdev);
 	if (r)
 		return r;
+	radeon_gart_restore(rdev);
 	/* Enable bus master */
 	tmp = RREG32(R_00004C_BUS_CNTL) & C_00004C_BUS_MASTER_DIS;
 	WREG32(R_00004C_BUS_CNTL, tmp);
@@ -406,10 +390,14 @@
 		if (G_000044_SW_INT(status))
 			radeon_fence_process(rdev);
 		/* Vertical blank interrupts */
-		if (G_007EDC_LB_D1_VBLANK_INTERRUPT(r500_disp_int))
+		if (G_007EDC_LB_D1_VBLANK_INTERRUPT(r500_disp_int)) {
 			drm_handle_vblank(rdev->ddev, 0);
-		if (G_007EDC_LB_D2_VBLANK_INTERRUPT(r500_disp_int))
+			wake_up(&rdev->irq.vblank_queue);
+		}
+		if (G_007EDC_LB_D2_VBLANK_INTERRUPT(r500_disp_int)) {
 			drm_handle_vblank(rdev->ddev, 1);
+			wake_up(&rdev->irq.vblank_queue);
+		}
 		if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(r500_disp_int)) {
 			queue_hotplug = true;
 			DRM_DEBUG("HPD1\n");
@@ -470,22 +458,22 @@
 		dev_warn(rdev->dev, "Wait MC idle timeout before updating MC.\n");
 }
 
-void rs600_vram_info(struct radeon_device *rdev)
+void rs600_mc_init(struct radeon_device *rdev)
 {
-	rdev->mc.vram_is_ddr = true;
-	rdev->mc.vram_width = 128;
-
-	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
-	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
+	u64 base;
 
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
-
-	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
-		rdev->mc.mc_vram_size = rdev->mc.aper_size;
-
-	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
-		rdev->mc.real_vram_size = rdev->mc.aper_size;
+	rdev->mc.vram_is_ddr = true;
+	rdev->mc.vram_width = 128;
+	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
+	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
+	rdev->mc.visible_vram_size = rdev->mc.aper_size;
+	rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
+	base = RREG32_MC(R_000004_MC_FB_LOCATION);
+	base = G_000004_MC_FB_START(base) << 16;
+	radeon_vram_location(rdev, &rdev->mc, base);
+	radeon_gtt_location(rdev, &rdev->mc);
 }
 
 void rs600_bandwidth_update(struct radeon_device *rdev)
@@ -661,12 +649,8 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	rs600_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = rs600_mc_init(rdev);
-	if (r)
-		return r;
+	/* initialize memory controller */
+	rs600_mc_init(rdev);
 	rs600_debugfs(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 06e2771..83b9174 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -129,27 +129,21 @@
 	rdev->pm.sideport_bandwidth.full = rfixed_div(rdev->pm.sideport_bandwidth, tmp);
 }
 
-void rs690_vram_info(struct radeon_device *rdev)
+void rs690_mc_init(struct radeon_device *rdev)
 {
 	fixed20_12 a;
+	u64 base;
 
 	rs400_gart_adjust_size(rdev);
-
 	rdev->mc.vram_is_ddr = true;
 	rdev->mc.vram_width = 128;
-
 	rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
 	rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
-
 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
-
-	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
-		rdev->mc.mc_vram_size = rdev->mc.aper_size;
-
-	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
-		rdev->mc.real_vram_size = rdev->mc.aper_size;
-
+	rdev->mc.visible_vram_size = rdev->mc.aper_size;
+	base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
+	base = G_000100_MC_FB_START(base) << 16;
 	rs690_pm_info(rdev);
 	/* FIXME: we should enforce default clock in case GPU is not in
 	 * default setup
@@ -160,22 +154,9 @@
 	a.full = rfixed_const(16);
 	/* core_bandwidth = sclk(Mhz) * 16 */
 	rdev->pm.core_bandwidth.full = rfixed_div(rdev->pm.sclk, a);
-}
-
-static int rs690_mc_init(struct radeon_device *rdev)
-{
-	int r;
-	u32 tmp;
-
-	/* Setup GPU memory space */
-	tmp = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
-	rdev->mc.vram_location = G_000100_MC_FB_START(tmp) << 16;
-	rdev->mc.gtt_location = 0xFFFFFFFFUL;
-	r = radeon_mc_setup(rdev);
 	rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
-	if (r)
-		return r;
-	return 0;
+	radeon_vram_location(rdev, &rdev->mc, base);
+	radeon_gtt_location(rdev, &rdev->mc);
 }
 
 void rs690_line_buffer_adjust(struct radeon_device *rdev,
@@ -728,12 +709,8 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	rs690_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = rs690_mc_init(rdev);
-	if (r)
-		return r;
+	/* initialize memory controller */
+	rs690_mc_init(rdev);
 	rv515_debugfs(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 0e1e6b8..bea747d 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -277,13 +277,15 @@
 	}
 }
 
-void rv515_vram_info(struct radeon_device *rdev)
+void rv515_mc_init(struct radeon_device *rdev)
 {
 	fixed20_12 a;
 
 	rv515_vram_get_type(rdev);
-
 	r100_vram_init_sizes(rdev);
+	radeon_vram_location(rdev, &rdev->mc, 0);
+	if (!(rdev->flags & RADEON_IS_AGP))
+		radeon_gtt_location(rdev, &rdev->mc);
 	/* FIXME: we should enforce default clock in case GPU is not in
 	 * default setup
 	 */
@@ -587,12 +589,15 @@
 	radeon_get_clock_info(rdev->ddev);
 	/* Initialize power management */
 	radeon_pm_init(rdev);
-	/* Get vram informations */
-	rv515_vram_info(rdev);
-	/* Initialize memory controller (also test AGP) */
-	r = r420_mc_init(rdev);
-	if (r)
-		return r;
+	/* initialize AGP */
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r) {
+			radeon_agp_disable(rdev);
+		}
+	}
+	/* initialize memory controller */
+	rv515_mc_init(rdev);
 	rv515_debugfs(rdev);
 	/* Fence driver */
 	r = radeon_fence_driver_init(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 0302167..37887de 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -56,6 +56,7 @@
 	r = radeon_gart_table_vram_pin(rdev);
 	if (r)
 		return r;
+	radeon_gart_restore(rdev);
 	/* Setup L2 cache */
 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
 				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
@@ -273,9 +274,10 @@
 /*
  * Core functions
  */
-static u32 r700_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
-						u32 num_backends,
-						u32 backend_disable_mask)
+static u32 r700_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
+					     u32 num_tile_pipes,
+					     u32 num_backends,
+					     u32 backend_disable_mask)
 {
 	u32 backend_map = 0;
 	u32 enabled_backends_mask;
@@ -284,6 +286,7 @@
 	u32 swizzle_pipe[R7XX_MAX_PIPES];
 	u32 cur_backend;
 	u32 i;
+	bool force_no_swizzle;
 
 	if (num_tile_pipes > R7XX_MAX_PIPES)
 		num_tile_pipes = R7XX_MAX_PIPES;
@@ -313,6 +316,18 @@
 	if (enabled_backends_count != num_backends)
 		num_backends = enabled_backends_count;
 
+	switch (rdev->family) {
+	case CHIP_RV770:
+	case CHIP_RV730:
+		force_no_swizzle = false;
+		break;
+	case CHIP_RV710:
+	case CHIP_RV740:
+	default:
+		force_no_swizzle = true;
+		break;
+	}
+
 	memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * R7XX_MAX_PIPES);
 	switch (num_tile_pipes) {
 	case 1:
@@ -323,49 +338,100 @@
 		swizzle_pipe[1] = 1;
 		break;
 	case 3:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 1;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 1;
+		}
 		break;
 	case 4:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 3;
-		swizzle_pipe[3] = 1;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 3;
+			swizzle_pipe[3] = 1;
+		}
 		break;
 	case 5:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 1;
-		swizzle_pipe[4] = 3;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 1;
+			swizzle_pipe[4] = 3;
+		}
 		break;
 	case 6:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 5;
-		swizzle_pipe[4] = 3;
-		swizzle_pipe[5] = 1;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 5;
+			swizzle_pipe[4] = 3;
+			swizzle_pipe[5] = 1;
+		}
 		break;
 	case 7:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 6;
-		swizzle_pipe[4] = 3;
-		swizzle_pipe[5] = 1;
-		swizzle_pipe[6] = 5;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+			swizzle_pipe[6] = 6;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 6;
+			swizzle_pipe[4] = 3;
+			swizzle_pipe[5] = 1;
+			swizzle_pipe[6] = 5;
+		}
 		break;
 	case 8:
-		swizzle_pipe[0] = 0;
-		swizzle_pipe[1] = 2;
-		swizzle_pipe[2] = 4;
-		swizzle_pipe[3] = 6;
-		swizzle_pipe[4] = 3;
-		swizzle_pipe[5] = 1;
-		swizzle_pipe[6] = 7;
-		swizzle_pipe[7] = 5;
+		if (force_no_swizzle) {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 1;
+			swizzle_pipe[2] = 2;
+			swizzle_pipe[3] = 3;
+			swizzle_pipe[4] = 4;
+			swizzle_pipe[5] = 5;
+			swizzle_pipe[6] = 6;
+			swizzle_pipe[7] = 7;
+		} else {
+			swizzle_pipe[0] = 0;
+			swizzle_pipe[1] = 2;
+			swizzle_pipe[2] = 4;
+			swizzle_pipe[3] = 6;
+			swizzle_pipe[4] = 3;
+			swizzle_pipe[5] = 1;
+			swizzle_pipe[6] = 7;
+			swizzle_pipe[7] = 5;
+		}
 		break;
 	}
 
@@ -385,8 +451,10 @@
 static void rv770_gpu_init(struct radeon_device *rdev)
 {
 	int i, j, num_qd_pipes;
+	u32 ta_aux_cntl;
 	u32 sx_debug_1;
 	u32 smx_dc_ctl0;
+	u32 db_debug3;
 	u32 num_gs_verts_per_thread;
 	u32 vgt_gs_per_es;
 	u32 gs_prim_buffer_depth = 0;
@@ -515,6 +583,7 @@
 
 	switch (rdev->config.rv770.max_tile_pipes) {
 	case 1:
+	default:
 		gb_tiling_config |= PIPE_TILING(0);
 		break;
 	case 2:
@@ -526,16 +595,17 @@
 	case 8:
 		gb_tiling_config |= PIPE_TILING(3);
 		break;
-	default:
-		break;
 	}
+	rdev->config.rv770.tiling_npipes = rdev->config.rv770.max_tile_pipes;
 
 	if (rdev->family == CHIP_RV770)
 		gb_tiling_config |= BANK_TILING(1);
 	else
 		gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
+	rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3);
 
 	gb_tiling_config |= GROUP_SIZE(0);
+	rdev->config.rv770.tiling_group_size = 256;
 
 	if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) {
 		gb_tiling_config |= ROW_TILING(3);
@@ -549,21 +619,27 @@
 
 	gb_tiling_config |= BANK_SWAPS(1);
 
-	if (rdev->family == CHIP_RV740)
-		backend_map = 0x28;
-	else
-		backend_map = r700_get_tile_pipe_to_backend_map(rdev->config.rv770.max_tile_pipes,
-								rdev->config.rv770.max_backends,
-								(0xff << rdev->config.rv770.max_backends) & 0xff);
-	gb_tiling_config |= BACKEND_MAP(backend_map);
+	cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000;
+	cc_rb_backend_disable |=
+		BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK);
 
-	cc_gc_shader_pipe_config =
+	cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffffff00;
+	cc_gc_shader_pipe_config |=
 		INACTIVE_QD_PIPES((R7XX_MAX_PIPES_MASK << rdev->config.rv770.max_pipes) & R7XX_MAX_PIPES_MASK);
 	cc_gc_shader_pipe_config |=
 		INACTIVE_SIMDS((R7XX_MAX_SIMDS_MASK << rdev->config.rv770.max_simds) & R7XX_MAX_SIMDS_MASK);
 
-	cc_rb_backend_disable =
-		BACKEND_DISABLE((R7XX_MAX_BACKENDS_MASK << rdev->config.rv770.max_backends) & R7XX_MAX_BACKENDS_MASK);
+	if (rdev->family == CHIP_RV740)
+		backend_map = 0x28;
+	else
+		backend_map = r700_get_tile_pipe_to_backend_map(rdev,
+								rdev->config.rv770.max_tile_pipes,
+								(R7XX_MAX_BACKENDS -
+								 r600_count_pipe_bits((cc_rb_backend_disable &
+										       R7XX_MAX_BACKENDS_MASK) >> 16)),
+								(cc_rb_backend_disable >> 16));
+	gb_tiling_config |= BACKEND_MAP(backend_map);
+
 
 	WREG32(GB_TILING_CONFIG, gb_tiling_config);
 	WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
@@ -571,16 +647,13 @@
 
 	WREG32(CC_RB_BACKEND_DISABLE,      cc_rb_backend_disable);
 	WREG32(CC_GC_SHADER_PIPE_CONFIG,   cc_gc_shader_pipe_config);
-	WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
+	WREG32(CC_SYS_RB_BACKEND_DISABLE,  cc_rb_backend_disable);
 
-	WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
 	WREG32(CGTS_SYS_TCC_DISABLE, 0);
 	WREG32(CGTS_TCC_DISABLE, 0);
-	WREG32(CGTS_USER_SYS_TCC_DISABLE, 0);
-	WREG32(CGTS_USER_TCC_DISABLE, 0);
 
 	num_qd_pipes =
-		R7XX_MAX_BACKENDS - r600_count_pipe_bits(cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK);
+		R7XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
 	WREG32(VGT_OUT_DEALLOC_CNTL, (num_qd_pipes * 4) & DEALLOC_DIST_MASK);
 	WREG32(VGT_VERTEX_REUSE_BLOCK_CNTL, ((num_qd_pipes * 4) - 2) & VTX_REUSE_DEPTH_MASK);
 
@@ -590,10 +663,8 @@
 
 	WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30));
 
-	WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO |
-			     SYNC_GRADIENT |
-			     SYNC_WALKER |
-			     SYNC_ALIGNER));
+	ta_aux_cntl = RREG32(TA_CNTL_AUX);
+	WREG32(TA_CNTL_AUX, ta_aux_cntl | DISABLE_CUBE_ANISO);
 
 	sx_debug_1 = RREG32(SX_DEBUG_1);
 	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
@@ -604,14 +675,28 @@
 	smx_dc_ctl0 |= CACHE_DEPTH((rdev->config.rv770.sx_num_of_sets * 64) - 1);
 	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
 
-	WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
-			       GS_FLUSH_CTL(4) |
-			       ACK_FLUSH_CTL(3) |
-			       SYNC_FLUSH_CTL));
+	if (rdev->family != CHIP_RV740)
+		WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) |
+				       GS_FLUSH_CTL(4) |
+				       ACK_FLUSH_CTL(3) |
+				       SYNC_FLUSH_CTL));
 
-	if (rdev->family == CHIP_RV770)
-		WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f));
-	else {
+	db_debug3 = RREG32(DB_DEBUG3);
+	db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f);
+	switch (rdev->family) {
+	case CHIP_RV770:
+	case CHIP_RV740:
+		db_debug3 |= DB_CLK_OFF_DELAY(0x1f);
+		break;
+	case CHIP_RV710:
+	case CHIP_RV730:
+	default:
+		db_debug3 |= DB_CLK_OFF_DELAY(2);
+		break;
+	}
+	WREG32(DB_DEBUG3, db_debug3);
+
+	if (rdev->family != CHIP_RV770) {
 		db_debug4 = RREG32(DB_DEBUG4);
 		db_debug4 |= DISABLE_TILE_COVERED_FOR_PS_ITER;
 		WREG32(DB_DEBUG4, db_debug4);
@@ -640,10 +725,10 @@
 			    ALU_UPDATE_FIFO_HIWATER(0x8));
 	switch (rdev->family) {
 	case CHIP_RV770:
-		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
-		break;
 	case CHIP_RV730:
 	case CHIP_RV710:
+		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x1);
+		break;
 	case CHIP_RV740:
 	default:
 		sq_ms_fifo_sizes |= FETCH_FIFO_HIWATER(0x4);
@@ -816,45 +901,13 @@
 	/* Setup GPU memory space */
 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
-
-	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
+	rdev->mc.visible_vram_size = rdev->mc.aper_size;
+	/* FIXME remove this once we support unmappable VRAM */
+	if (rdev->mc.mc_vram_size > rdev->mc.aper_size) {
 		rdev->mc.mc_vram_size = rdev->mc.aper_size;
-
-	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
 		rdev->mc.real_vram_size = rdev->mc.aper_size;
-
-	if (rdev->flags & RADEON_IS_AGP) {
-		/* gtt_size is setup by radeon_agp_init */
-		rdev->mc.gtt_location = rdev->mc.agp_base;
-		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
-		/* Try to put vram before or after AGP because we
-		 * we want SYSTEM_APERTURE to cover both VRAM and
-		 * AGP so that GPU can catch out of VRAM/AGP access
-		 */
-		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
-			/* Enough place before */
-			rdev->mc.vram_location = rdev->mc.gtt_location -
-							rdev->mc.mc_vram_size;
-		} else if (tmp > rdev->mc.mc_vram_size) {
-			/* Enough place after */
-			rdev->mc.vram_location = rdev->mc.gtt_location +
-							rdev->mc.gtt_size;
-		} else {
-			/* Try to setup VRAM then AGP might not
-			 * not work on some card
-			 */
-			rdev->mc.vram_location = 0x00000000UL;
-			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
-		}
-	} else {
-		rdev->mc.vram_location = 0x00000000UL;
-		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
-		rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
 	}
-	rdev->mc.vram_start = rdev->mc.vram_location;
-	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
-	rdev->mc.gtt_start = rdev->mc.gtt_location;
-	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	r600_vram_gtt_location(rdev, &rdev->mc);
 	/* FIXME: we should enforce default clock in case GPU is not in
 	 * default setup
 	 */
@@ -863,6 +916,7 @@
 	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
 	return 0;
 }
+
 int rv770_gpu_reset(struct radeon_device *rdev)
 {
 	/* FIXME: implement any rv770 specific bits */
@@ -1038,6 +1092,7 @@
 	r = radeon_fence_driver_init(rdev);
 	if (r)
 		return r;
+	/* initialize AGP */
 	if (rdev->flags & RADEON_IS_AGP) {
 		r = radeon_agp_init(rdev);
 		if (r)
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index a1367ab..9506f8c 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -343,4 +343,6 @@
 
 #define	WAIT_UNTIL					0x8040
 
+#define	SRBM_STATUS				        0x0E50
+
 #endif
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 3d47a2c..a759170 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -480,7 +480,7 @@
 	void *from_virtual;
 	void *to_virtual;
 	int i;
-	int ret;
+	int ret = -ENOMEM;
 
 	if (ttm->page_flags & TTM_PAGE_FLAG_USER) {
 		ret = ttm_tt_set_user(ttm, ttm->tsk, ttm->start,
@@ -499,8 +499,10 @@
 
 	for (i = 0; i < ttm->num_pages; ++i) {
 		from_page = read_mapping_page(swap_space, i, NULL);
-		if (IS_ERR(from_page))
+		if (IS_ERR(from_page)) {
+			ret = PTR_ERR(from_page);
 			goto out_err;
+		}
 		to_page = __ttm_tt_get_page(ttm, i);
 		if (unlikely(to_page == NULL))
 			goto out_err;
@@ -523,7 +525,7 @@
 	return 0;
 out_err:
 	ttm_tt_free_alloced_pages(ttm);
-	return -ENOMEM;
+	return ret;
 }
 
 int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage)
@@ -535,6 +537,7 @@
 	void *from_virtual;
 	void *to_virtual;
 	int i;
+	int ret = -ENOMEM;
 
 	BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated);
 	BUG_ON(ttm->caching_state != tt_cached);
@@ -557,7 +560,7 @@
 						0);
 		if (unlikely(IS_ERR(swap_storage))) {
 			printk(KERN_ERR "Failed allocating swap storage.\n");
-			return -ENOMEM;
+			return PTR_ERR(swap_storage);
 		}
 	} else
 		swap_storage = persistant_swap_storage;
@@ -569,9 +572,10 @@
 		if (unlikely(from_page == NULL))
 			continue;
 		to_page = read_mapping_page(swap_space, i, NULL);
-		if (unlikely(to_page == NULL))
+		if (unlikely(IS_ERR(to_page))) {
+			ret = PTR_ERR(to_page);
 			goto out_err;
-
+		}
 		preempt_disable();
 		from_virtual = kmap_atomic(from_page, KM_USER0);
 		to_virtual = kmap_atomic(to_page, KM_USER1);
@@ -595,5 +599,5 @@
 	if (!persistant_swap_storage)
 		fput(swap_storage);
 
-	return -ENOMEM;
+	return ret;
 }
diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig
index 0920492..61ab4da 100644
--- a/drivers/gpu/vga/Kconfig
+++ b/drivers/gpu/vga/Kconfig
@@ -16,3 +16,14 @@
 	help
 	  Reserves space in the kernel to maintain resource locking for
 	  multiple GPUS.  The overhead for each GPU is very small.
+
+config VGA_SWITCHEROO
+	bool "Laptop Hybrid Grapics - GPU switching support"
+	depends on X86
+	depends on ACPI
+	help
+	  Many laptops released in 2008/9/10 have two gpus with a multiplxer
+	  to switch between them. This adds support for dynamic switching when
+          X isn't running and delayed switching until the next logoff. This
+	  features is called hybrid graphics, ATI PowerXpress, and Nvidia
+	  HybridPower.
diff --git a/drivers/gpu/vga/Makefile b/drivers/gpu/vga/Makefile
index 7cc8c1e..14ca30b 100644
--- a/drivers/gpu/vga/Makefile
+++ b/drivers/gpu/vga/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_VGA_ARB)  += vgaarb.o
+obj-$(CONFIG_VGA_SWITCHEROO) += vga_switcheroo.o
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
new file mode 100644
index 0000000..d6d1149
--- /dev/null
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ *
+ * Licensed under GPLv2
+ *
+ * vga_switcheroo.c - Support for laptop with dual GPU using one set of outputs
+
+ Switcher interface - methods require for ATPX and DCM
+ - switchto - this throws the output MUX switch
+ - discrete_set_power - sets the power state for the discrete card
+
+ GPU driver interface
+ - set_gpu_state - this should do the equiv of s/r for the card
+		  - this should *not* set the discrete power state
+ - switch_check  - check if the device is in a position to switch now
+ */
+
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/fb.h>
+
+#include <linux/pci.h>
+#include <linux/vga_switcheroo.h>
+
+struct vga_switcheroo_client {
+	struct pci_dev *pdev;
+	struct fb_info *fb_info;
+	int pwr_state;
+	void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state);
+	bool (*can_switch)(struct pci_dev *pdev);
+	int id;
+	bool active;
+};
+
+static DEFINE_MUTEX(vgasr_mutex);
+
+struct vgasr_priv {
+
+	bool active;
+	bool delayed_switch_active;
+	enum vga_switcheroo_client_id delayed_client_id;
+
+	struct dentry *debugfs_root;
+	struct dentry *switch_file;
+
+	int registered_clients;
+	struct vga_switcheroo_client clients[VGA_SWITCHEROO_MAX_CLIENTS];
+
+	struct vga_switcheroo_handler *handler;
+};
+
+static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
+static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv);
+
+/* only one switcheroo per system */
+static struct vgasr_priv vgasr_priv;
+
+int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler)
+{
+	mutex_lock(&vgasr_mutex);
+	if (vgasr_priv.handler) {
+		mutex_unlock(&vgasr_mutex);
+		return -EINVAL;
+	}
+
+	vgasr_priv.handler = handler;
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(vga_switcheroo_register_handler);
+
+void vga_switcheroo_unregister_handler(void)
+{
+	mutex_lock(&vgasr_mutex);
+	vgasr_priv.handler = NULL;
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
+
+static void vga_switcheroo_enable(void)
+{
+	int i;
+	int ret;
+	/* call the handler to init */
+	vgasr_priv.handler->init();
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		ret = vgasr_priv.handler->get_client_id(vgasr_priv.clients[i].pdev);
+		if (ret < 0)
+			return;
+
+		vgasr_priv.clients[i].id = ret;
+	}
+	vga_switcheroo_debugfs_init(&vgasr_priv);
+	vgasr_priv.active = true;
+}
+
+int vga_switcheroo_register_client(struct pci_dev *pdev,
+				   void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state),
+				   bool (*can_switch)(struct pci_dev *pdev))
+{
+	int index;
+
+	mutex_lock(&vgasr_mutex);
+	/* don't do IGD vs DIS here */
+	if (vgasr_priv.registered_clients & 1)
+		index = 1;
+	else
+		index = 0;
+
+	vgasr_priv.clients[index].pwr_state = VGA_SWITCHEROO_ON;
+	vgasr_priv.clients[index].pdev = pdev;
+	vgasr_priv.clients[index].set_gpu_state = set_gpu_state;
+	vgasr_priv.clients[index].can_switch = can_switch;
+	vgasr_priv.clients[index].id = -1;
+	if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		vgasr_priv.clients[index].active = true;
+
+	vgasr_priv.registered_clients |= (1 << index);
+
+	/* if we get two clients + handler */
+	if (vgasr_priv.registered_clients == 0x3 && vgasr_priv.handler) {
+		printk(KERN_INFO "vga_switcheroo: enabled\n");
+		vga_switcheroo_enable();
+	}
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(vga_switcheroo_register_client);
+
+void vga_switcheroo_unregister_client(struct pci_dev *pdev)
+{
+	int i;
+
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].pdev == pdev) {
+			vgasr_priv.registered_clients &= ~(1 << i);
+			break;
+		}
+	}
+
+	printk(KERN_INFO "vga_switcheroo: disabled\n");
+	vga_switcheroo_debugfs_fini(&vgasr_priv);
+	vgasr_priv.active = false;
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_unregister_client);
+
+void vga_switcheroo_client_fb_set(struct pci_dev *pdev,
+				 struct fb_info *info)
+{
+	int i;
+
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].pdev == pdev) {
+			vgasr_priv.clients[i].fb_info = info;
+			break;
+		}
+	}
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_client_fb_set);
+
+static int vga_switcheroo_show(struct seq_file *m, void *v)
+{
+	int i;
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		seq_printf(m, "%d:%c:%s:%s\n", i,
+			   vgasr_priv.clients[i].active ? '+' : ' ',
+			   vgasr_priv.clients[i].pwr_state ? "Pwr" : "Off",
+			   pci_name(vgasr_priv.clients[i].pdev));
+	}
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+
+static int vga_switcheroo_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vga_switcheroo_show, NULL);
+}
+
+static int vga_switchon(struct vga_switcheroo_client *client)
+{
+	int ret;
+
+	ret = vgasr_priv.handler->power_state(client->id, VGA_SWITCHEROO_ON);
+	/* call the driver callback to turn on device */
+	client->set_gpu_state(client->pdev, VGA_SWITCHEROO_ON);
+	client->pwr_state = VGA_SWITCHEROO_ON;
+	return 0;
+}
+
+static int vga_switchoff(struct vga_switcheroo_client *client)
+{
+	/* call the driver callback to turn off device */
+	client->set_gpu_state(client->pdev, VGA_SWITCHEROO_OFF);
+	vgasr_priv.handler->power_state(client->id, VGA_SWITCHEROO_OFF);
+	client->pwr_state = VGA_SWITCHEROO_OFF;
+	return 0;
+}
+
+static int vga_switchto(struct vga_switcheroo_client *new_client)
+{
+	int ret;
+	int i;
+	struct vga_switcheroo_client *active = NULL;
+
+	if (new_client->active == true)
+		return 0;
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].active == true) {
+			active = &vgasr_priv.clients[i];
+			break;
+		}
+	}
+	if (!active)
+		return 0;
+
+	/* power up the first device */
+	ret = pci_enable_device(new_client->pdev);
+	if (ret)
+		return ret;
+
+	if (new_client->pwr_state == VGA_SWITCHEROO_OFF)
+		vga_switchon(new_client);
+
+	/* swap shadow resource to denote boot VGA device has changed so X starts on new device */
+	active->active = false;
+
+	active->pdev->resource[PCI_ROM_RESOURCE].flags &= ~IORESOURCE_ROM_SHADOW;
+	new_client->pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
+
+	if (new_client->fb_info) {
+		struct fb_event event;
+		event.info = new_client->fb_info;
+		fb_notifier_call_chain(FB_EVENT_REMAP_ALL_CONSOLE, &event);
+	}
+
+	ret = vgasr_priv.handler->switchto(new_client->id);
+	if (ret)
+		return ret;
+
+	if (active->pwr_state == VGA_SWITCHEROO_ON)
+		vga_switchoff(active);
+
+	new_client->active = true;
+	return 0;
+}
+
+static ssize_t
+vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos)
+{
+	char usercmd[64];
+	const char *pdev_name;
+	int i, ret;
+	bool delay = false, can_switch;
+	int client_id = -1;
+	struct vga_switcheroo_client *client = NULL;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(usercmd, ubuf, cnt))
+		return -EFAULT;
+
+	mutex_lock(&vgasr_mutex);
+
+	if (!vgasr_priv.active)
+		return -EINVAL;
+
+	/* pwr off the device not in use */
+	if (strncmp(usercmd, "OFF", 3) == 0) {
+		for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+			if (vgasr_priv.clients[i].active)
+				continue;
+			if (vgasr_priv.clients[i].pwr_state == VGA_SWITCHEROO_ON)
+				vga_switchoff(&vgasr_priv.clients[i]);
+		}
+		goto out;
+	}
+	/* pwr on the device not in use */
+	if (strncmp(usercmd, "ON", 2) == 0) {
+		for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+			if (vgasr_priv.clients[i].active)
+				continue;
+			if (vgasr_priv.clients[i].pwr_state == VGA_SWITCHEROO_OFF)
+				vga_switchon(&vgasr_priv.clients[i]);
+		}
+		goto out;
+	}
+
+	/* request a delayed switch - test can we switch now */
+	if (strncmp(usercmd, "DIGD", 4) == 0) {
+		client_id = VGA_SWITCHEROO_IGD;
+		delay = true;
+	}
+
+	if (strncmp(usercmd, "DDIS", 4) == 0) {
+		client_id = VGA_SWITCHEROO_DIS;
+		delay = true;
+	}
+
+	if (strncmp(usercmd, "IGD", 3) == 0)
+		client_id = VGA_SWITCHEROO_IGD;
+
+	if (strncmp(usercmd, "DIS", 3) == 0)
+		client_id = VGA_SWITCHEROO_DIS;
+
+	if (client_id == -1)
+		goto out;
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].id == client_id) {
+			client = &vgasr_priv.clients[i];
+			break;
+		}
+	}
+
+	vgasr_priv.delayed_switch_active = false;
+	/* okay we want a switch - test if devices are willing to switch */
+	can_switch = true;
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		can_switch = vgasr_priv.clients[i].can_switch(vgasr_priv.clients[i].pdev);
+		if (can_switch == false) {
+			printk(KERN_ERR "vga_switcheroo: client %d refused switch\n", i);
+			break;
+		}
+	}
+
+	if (can_switch == false && delay == false)
+		goto out;
+
+	if (can_switch == true) {
+		pdev_name = pci_name(client->pdev);
+		ret = vga_switchto(client);
+		if (ret)
+			printk(KERN_ERR "vga_switcheroo: switching failed %d\n", ret);
+	} else {
+		printk(KERN_INFO "vga_switcheroo: setting delayed switch to client %d\n", client->id);
+		vgasr_priv.delayed_switch_active = true;
+		vgasr_priv.delayed_client_id = client_id;
+
+		/* we should at least power up the card to
+		   make the switch faster */
+		if (client->pwr_state == VGA_SWITCHEROO_OFF)
+			vga_switchon(client);
+	}
+
+out:
+	mutex_unlock(&vgasr_mutex);
+	return cnt;
+}
+
+static const struct file_operations vga_switcheroo_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = vga_switcheroo_debugfs_open,
+	.write = vga_switcheroo_debugfs_write,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv)
+{
+	if (priv->switch_file) {
+		debugfs_remove(priv->switch_file);
+		priv->switch_file = NULL;
+	}
+	if (priv->debugfs_root) {
+		debugfs_remove(priv->debugfs_root);
+		priv->debugfs_root = NULL;
+	}
+}
+
+static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv)
+{
+	/* already initialised */
+	if (priv->debugfs_root)
+		return 0;
+	priv->debugfs_root = debugfs_create_dir("vgaswitcheroo", NULL);
+
+	if (!priv->debugfs_root) {
+		printk(KERN_ERR "vga_switcheroo: Cannot create /sys/kernel/debug/vgaswitcheroo\n");
+		goto fail;
+	}
+
+	priv->switch_file = debugfs_create_file("switch", 0644,
+						priv->debugfs_root, NULL, &vga_switcheroo_debugfs_fops);
+	if (!priv->switch_file) {
+		printk(KERN_ERR "vga_switcheroo: cannot create /sys/kernel/debug/vgaswitcheroo/switch\n");
+		goto fail;
+	}
+	return 0;
+fail:
+	vga_switcheroo_debugfs_fini(priv);
+	return -1;
+}
+
+int vga_switcheroo_process_delayed_switch(void)
+{
+	struct vga_switcheroo_client *client = NULL;
+	const char *pdev_name;
+	bool can_switch = true;
+	int i;
+	int ret;
+	int err = -EINVAL;
+
+	mutex_lock(&vgasr_mutex);
+	if (!vgasr_priv.delayed_switch_active)
+		goto err;
+
+	printk(KERN_INFO "vga_switcheroo: processing delayed switch to %d\n", vgasr_priv.delayed_client_id);
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].id == vgasr_priv.delayed_client_id)
+			client = &vgasr_priv.clients[i];
+		can_switch = vgasr_priv.clients[i].can_switch(vgasr_priv.clients[i].pdev);
+		if (can_switch == false) {
+			printk(KERN_ERR "vga_switcheroo: client %d refused switch\n", i);
+			break;
+		}
+	}
+
+	if (can_switch == false || client == NULL)
+		goto err;
+
+	pdev_name = pci_name(client->pdev);
+	ret = vga_switchto(client);
+	if (ret)
+		printk(KERN_ERR "vga_switcheroo: delayed switching failed %d\n", ret);
+
+	vgasr_priv.delayed_switch_active = false;
+	err = 0;
+err:
+	mutex_unlock(&vgasr_mutex);
+	return err;
+}
+EXPORT_SYMBOL(vga_switcheroo_process_delayed_switch);
+
diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 878f8ec..57d00ca 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -81,15 +81,15 @@
 	return chipset_table->ultra_settings;
 }
 
-static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
+static void aec6210_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u16 d_conf		= 0;
 	u8 ultra = 0, ultra_conf = 0;
 	u8 tmp0 = 0, tmp1 = 0, tmp2 = 0;
+	const u8 speed = drive->dma_mode;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -109,15 +109,15 @@
 	local_irq_restore(flags);
 }
 
-static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
+static void aec6260_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u8 unit			= drive->dn & 1;
 	u8 tmp1 = 0, tmp2 = 0;
 	u8 ultra = 0, drive_conf = 0, ultra_conf = 0;
+	const u8 speed = drive->dma_mode;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -134,9 +134,10 @@
 	local_irq_restore(flags);
 }
 
-static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
+	drive->dma_mode = drive->pio_mode;
+	hwif->port_ops->set_dma_mode(hwif, drive);
 }
 
 static int init_chipset_aec62xx(struct pci_dev *dev)
diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c
index 90da1f9..25b9fe3 100644
--- a/drivers/ide/ali14xx.c
+++ b/drivers/ide/ali14xx.c
@@ -109,13 +109,14 @@
  * This function computes timing parameters
  * and sets controller registers accordingly.
  */
-static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ali14xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int driveNum;
 	int time1, time2;
 	u8 param1, param2, param3, param4;
 	unsigned long flags;
 	int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 
 	/* calculate timing, according to PIO mode */
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 0abc43f..2c8016a 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -8,7 +8,7 @@
  *  Copyright (C) 2002 Alan Cox
  *  ALi (now ULi M5228) support by Clear Zhang <Clear.Zhang@ali.com.tw>
  *  Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com>
- *  Copyright (C) 2007 Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
+ *  Copyright (C) 2007-2010 Bartlomiej Zolnierkiewicz
  *
  *  (U)DMA capable version of ali 1533/1543(C), 1535(D)
  *
@@ -48,61 +48,84 @@
 static u8 chip_is_1543c_e;
 static struct pci_dev *isa_dev;
 
+static void ali_fifo_control(ide_hwif_t *hwif, ide_drive_t *drive, int on)
+{
+	struct pci_dev *pdev = to_pci_dev(hwif->dev);
+	int pio_fifo = 0x54 + hwif->channel;
+	u8 fifo;
+	int shift = 4 * (drive->dn & 1);
+
+	pci_read_config_byte(pdev, pio_fifo, &fifo);
+	fifo &= ~(0x0F << shift);
+	fifo |= (on << shift);
+	pci_write_config_byte(pdev, pio_fifo, fifo);
+}
+
+static void ali_program_timings(ide_hwif_t *hwif, ide_drive_t *drive,
+				struct ide_timing *t, u8 ultra)
+{
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
+	int port = hwif->channel ? 0x5c : 0x58;
+	int udmat = 0x56 + hwif->channel;
+	u8 unit = drive->dn & 1, udma;
+	int shift = 4 * unit;
+
+	/* Set up the UDMA */
+	pci_read_config_byte(dev, udmat, &udma);
+	udma &= ~(0x0F << shift);
+	udma |= ultra << shift;
+	pci_write_config_byte(dev, udmat, udma);
+
+	if (t == NULL)
+		return;
+
+	t->setup = clamp_val(t->setup, 1, 8) & 7;
+	t->act8b = clamp_val(t->act8b, 1, 8) & 7;
+	t->rec8b = clamp_val(t->rec8b, 1, 16) & 15;
+	t->active = clamp_val(t->active, 1, 8) & 7;
+	t->recover = clamp_val(t->recover, 1, 16) & 15;
+
+	pci_write_config_byte(dev, port, t->setup);
+	pci_write_config_byte(dev, port + 1, (t->act8b << 4) | t->rec8b);
+	pci_write_config_byte(dev, port + unit + 2,
+			      (t->active << 4) | t->recover);
+}
+
 /**
  *	ali_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Program the controller for the given PIO mode.
  */
 
-static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ali_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-	int s_time = t->setup, a_time = t->active, c_time = t->cycle;
-	u8 s_clc, a_clc, r_clc;
-	unsigned long flags;
+	ide_drive_t *pair = ide_get_pair_dev(drive);
 	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-	int port = hwif->channel ? 0x5c : 0x58;
-	int portFIFO = hwif->channel ? 0x55 : 0x54;
-	u8 cd_dma_fifo = 0, unit = drive->dn & 1;
+	unsigned long T =  1000000 / bus_speed; /* PCI clock based */
+	struct ide_timing t;
 
-	if ((s_clc = (s_time * bus_speed + 999) / 1000) >= 8)
-		s_clc = 0;
-	if ((a_clc = (a_time * bus_speed + 999) / 1000) >= 8)
-		a_clc = 0;
+	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
+	if (pair) {
+		struct ide_timing p;
 
-	if (!(r_clc = (c_time * bus_speed + 999) / 1000 - a_clc - s_clc)) {
-		r_clc = 1;
-	} else {
-		if (r_clc >= 16)
-			r_clc = 0;
+		ide_timing_compute(pair, pair->pio_mode, &p, T, 1);
+		ide_timing_merge(&p, &t, &t,
+			IDE_TIMING_SETUP | IDE_TIMING_8BIT);
+		if (pair->dma_mode) {
+			ide_timing_compute(pair, pair->dma_mode, &p, T, 1);
+			ide_timing_merge(&p, &t, &t,
+				IDE_TIMING_SETUP | IDE_TIMING_8BIT);
+		}
 	}
-	local_irq_save(flags);
-	
+
 	/* 
 	 * PIO mode => ATA FIFO on, ATAPI FIFO off
 	 */
-	pci_read_config_byte(dev, portFIFO, &cd_dma_fifo);
-	if (drive->media==ide_disk) {
-		if (unit) {
-			pci_write_config_byte(dev, portFIFO, (cd_dma_fifo & 0x0F) | 0x50);
-		} else {
-			pci_write_config_byte(dev, portFIFO, (cd_dma_fifo & 0xF0) | 0x05);
-		}
-	} else {
-		if (unit) {
-			pci_write_config_byte(dev, portFIFO, cd_dma_fifo & 0x0F);
-		} else {
-			pci_write_config_byte(dev, portFIFO, cd_dma_fifo & 0xF0);
-		}
-	}
-	
-	pci_write_config_byte(dev, port, s_clc);
-	pci_write_config_byte(dev, port + unit + 2, (a_clc << 4) | r_clc);
-	local_irq_restore(flags);
+	ali_fifo_control(hwif, drive, (drive->media == ide_disk) ? 0x05 : 0x00);
+
+	ali_program_timings(hwif, drive, &t, 0);
 }
 
 /**
@@ -132,44 +155,42 @@
 
 /**
  *	ali_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Configure the hardware for the desired IDE transfer mode.
  */
 
-static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void ali_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
+	static u8 udma_timing[7] = { 0xC, 0xB, 0xA, 0x9, 0x8, 0xF, 0xD };
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 speed1		= speed;
-	u8 unit			= drive->dn & 1;
+	ide_drive_t *pair	= ide_get_pair_dev(drive);
+	int bus_speed		= ide_pci_clk ? ide_pci_clk : 33;
+	unsigned long T		=  1000000 / bus_speed; /* PCI clock based */
+	const u8 speed		= drive->dma_mode;
 	u8 tmpbyte		= 0x00;
-	int m5229_udma		= (hwif->channel) ? 0x57 : 0x56;
-
-	if (speed == XFER_UDMA_6)
-		speed1 = 0x47;
+	struct ide_timing t;
 
 	if (speed < XFER_UDMA_0) {
-		u8 ultra_enable	= (unit) ? 0x7f : 0xf7;
-		/*
-		 * clear "ultra enable" bit
-		 */
-		pci_read_config_byte(dev, m5229_udma, &tmpbyte);
-		tmpbyte &= ultra_enable;
-		pci_write_config_byte(dev, m5229_udma, tmpbyte);
+		ide_timing_compute(drive, drive->dma_mode, &t, T, 1);
+		if (pair) {
+			struct ide_timing p;
 
-		/*
-		 * FIXME: Oh, my... DMA timings are never set.
-		 */
+			ide_timing_compute(pair, pair->pio_mode, &p, T, 1);
+			ide_timing_merge(&p, &t, &t,
+				IDE_TIMING_SETUP | IDE_TIMING_8BIT);
+			if (pair->dma_mode) {
+				ide_timing_compute(pair, pair->dma_mode,
+						&p, T, 1);
+				ide_timing_merge(&p, &t, &t,
+					IDE_TIMING_SETUP | IDE_TIMING_8BIT);
+			}
+		}
+		ali_program_timings(hwif, drive, &t, 0);
 	} else {
-		pci_read_config_byte(dev, m5229_udma, &tmpbyte);
-		tmpbyte &= (0x0f << ((1-unit) << 2));
-		/*
-		 * enable ultra dma and set timing
-		 */
-		tmpbyte |= ((0x08 | ((4-speed1)&0x07)) << (unit << 2));
-		pci_write_config_byte(dev, m5229_udma, tmpbyte);
+		ali_program_timings(hwif, drive, NULL,
+				udma_timing[speed - XFER_UDMA_0]);
 		if (speed >= XFER_UDMA_3) {
 			pci_read_config_byte(dev, 0x4b, &tmpbyte);
 			tmpbyte |= 1;
@@ -355,19 +376,13 @@
  *
  *	This checks if the controller and the cable are capable
  *	of UDMA66 transfers. It doesn't check the drives.
- *	But see note 2 below!
- *
- *	FIXME: frobs bits that are not defined on newer ALi devicea
  */
 
 static u8 ali_cable_detect(ide_hwif_t *hwif)
 {
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned long flags;
 	u8 cbl = ATA_CBL_PATA40, tmpbyte;
 
-	local_irq_save(flags);
-
 	if (m5229_revision >= 0xC2) {
 		/*
 		 * m5229 80-pin cable detection (from Host View)
@@ -387,8 +402,6 @@
 		}
 	}
 
-	local_irq_restore(flags);
-
 	return cbl;
 }
 
@@ -584,6 +597,6 @@
 module_init(ali15x3_ide_init);
 module_exit(ali15x3_ide_exit);
 
-MODULE_AUTHOR("Michael Aubry, Andrzej Krzysztofowicz, CJ, Andre Hedrick, Alan Cox");
+MODULE_AUTHOR("Michael Aubry, Andrzej Krzysztofowicz, CJ, Andre Hedrick, Alan Cox, Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("PCI driver module for ALi 15x3 IDE");
 MODULE_LICENSE("GPL");
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
index 628cd2e..3747b25 100644
--- a/drivers/ide/amd74xx.c
+++ b/drivers/ide/amd74xx.c
@@ -3,7 +3,7 @@
  * IDE driver for Linux.
  *
  * Copyright (c) 2000-2002 Vojtech Pavlik
- * Copyright (c) 2007-2008 Bartlomiej Zolnierkiewicz
+ * Copyright (c) 2007-2010 Bartlomiej Zolnierkiewicz
  *
  * Based on the work of:
  *      Andre Hedrick
@@ -70,7 +70,8 @@
 	default: return;
 	}
 
-	pci_write_config_byte(dev, AMD_UDMA_TIMING + offset + (3 - dn), t);
+	if (timing->udma)
+		pci_write_config_byte(dev, AMD_UDMA_TIMING + offset + 3 - dn, t);
 }
 
 /*
@@ -78,14 +79,14 @@
  * to a desired transfer mode.  It also can be called by upper layers.
  */
 
-static void amd_set_drive(ide_drive_t *drive, const u8 speed)
+static void amd_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	ide_drive_t *peer = ide_get_pair_dev(drive);
 	struct ide_timing t, p;
 	int T, UT;
 	u8 udma_mask = hwif->ultra_mask;
+	const u8 speed = drive->dma_mode;
 
 	T = 1000000000 / amd_clock;
 	UT = (udma_mask == ATA_UDMA2) ? T : (T / 2);
@@ -93,7 +94,7 @@
 	ide_timing_compute(drive, speed, &t, T, UT);
 
 	if (peer) {
-		ide_timing_compute(peer, peer->current_speed, &p, T, UT);
+		ide_timing_compute(peer, peer->pio_mode, &p, T, UT);
 		ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
 	}
 
@@ -107,9 +108,10 @@
  * amd_set_pio_mode() is a callback from upper layers for PIO-only tuning.
  */
 
-static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	amd_set_drive(drive, XFER_PIO_0 + pio);
+	drive->dma_mode = drive->pio_mode;
+	amd_set_drive(hwif, drive);
 }
 
 static void amd7409_cable_detect(struct pci_dev *dev)
@@ -340,6 +342,6 @@
 module_init(amd74xx_ide_init);
 module_exit(amd74xx_ide_exit);
 
-MODULE_AUTHOR("Vojtech Pavlik");
+MODULE_AUTHOR("Vojtech Pavlik, Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("AMD PCI IDE driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 248219a..000a78e 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -172,11 +172,12 @@
 	leave_16bit(chipselect, mode);
 }
 
-static void at91_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void at91_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	struct ide_timing *timing;
-	u8 chipselect = drive->hwif->select_data;
+	u8 chipselect = hwif->select_data;
 	int use_iordy = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	pdbg("chipselect %u pio %u\n", chipselect, pio);
 
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index 837322b..15f0ead 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -42,19 +42,20 @@
 
 /**
  *	atiixp_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode.
  */
 
-static void atiixp_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long flags;
 	int timing_shift = (drive->dn ^ 1) * 8;
 	u32 pio_timing_data;
 	u16 pio_mode_data;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	spin_lock_irqsave(&atiixp_lock, flags);
 
@@ -74,21 +75,22 @@
 
 /**
  *	atiixp_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Set a ATIIXP host controller to the desired DMA mode.  This involves
  *	programming the right timing data into the PCI configuration space.
  */
 
-static void atiixp_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void atiixp_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long flags;
 	int timing_shift = (drive->dn ^ 1) * 8;
 	u32 tmp32;
 	u16 tmp16;
 	u16 udma_ctl = 0;
+	const u8 speed = drive->dma_mode;
 
 	spin_lock_irqsave(&atiixp_lock, flags);
 
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 349a67b..b26c234 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -99,12 +99,11 @@
 }
 #endif
 
-static void au1xxx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void au1xxx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2);
 
-	/* set pio mode! */
-	switch(pio) {
+	switch (drive->pio_mode - XFER_PIO_0) {
 	case 0:
 		mem_sttime = SBC_IDE_TIMING(PIO0);
 
@@ -161,11 +160,11 @@
 	au_writel(mem_stcfg,MEM_STCFG2);
 }
 
-static void auide_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void auide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2);
 
-	switch(speed) {
+	switch (drive->dma_mode) {
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 	case XFER_MW_DMA_2:
 		mem_sttime = SBC_IDE_TIMING(MDMA2);
@@ -297,8 +296,8 @@
 	 */
 	drive->waiting_for_dma++;
 	if (drive->waiting_for_dma >= DMA_WAIT_TIMEOUT) {
-		printk(KERN_WARNING "%s: timeout waiting for ddma to \
-                                     complete\n", drive->name);
+		printk(KERN_WARNING "%s: timeout waiting for ddma to complete\n",
+		       drive->name);
 		return 1;
 	}
 	udelay(10);
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index 1a32d62..d2b8b27 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -572,9 +572,10 @@
 	program_drive_counts(drive, index);
 }
 
-static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cmd640_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned int index = 0, cycle_time;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 b;
 
 	switch (pio) {
@@ -605,7 +606,7 @@
 }
 #endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
 
-static void cmd640_init_dev(ide_drive_t *drive)
+static void __init cmd640_init_dev(ide_drive_t *drive)
 {
 	unsigned int i = drive->hwif->channel * 2 + (drive->dn & 1);
 
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index f2500c8..5f80312 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -7,6 +7,7 @@
  * Copyright (C) 1998		David S. Miller (davem@redhat.com)
  *
  * Copyright (C) 1999-2002	Andre Hedrick <andre@linux-ide.org>
+ * Copyright (C) 2007-2010	Bartlomiej Zolnierkiewicz
  * Copyright (C) 2007,2009	MontaVista Software, Inc. <source@mvista.com>
  */
 
@@ -50,72 +51,42 @@
 #define UDIDETCR1	0x7B
 #define DTPR1		0x7C
 
-static u8 quantize_timing(int timing, int quant)
+static void cmd64x_program_timings(ide_drive_t *drive, u8 mode)
 {
-	return (timing + quant - 1) / quant;
-}
-
-/*
- * This routine calculates active/recovery counts and then writes them into
- * the chipset registers.
- */
-static void program_cycle_times (ide_drive_t *drive, int cycle_time, int active_time)
-{
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	int clock_time = 1000 / (ide_pci_clk ? ide_pci_clk : 33);
-	u8  cycle_count, active_count, recovery_count, drwtim;
+	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
+	const unsigned long T = 1000000 / bus_speed;
 	static const u8 recovery_values[] =
 		{15, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0};
+	static const u8 setup_values[] = {0x40, 0x40, 0x40, 0x80, 0, 0xc0};
+	static const u8 arttim_regs[4] = {ARTTIM0, ARTTIM1, ARTTIM23, ARTTIM23};
 	static const u8 drwtim_regs[4] = {DRWTIM0, DRWTIM1, DRWTIM2, DRWTIM3};
+	struct ide_timing t;
+	u8 arttim = 0;
 
-	cycle_count	= quantize_timing( cycle_time, clock_time);
-	active_count	= quantize_timing(active_time, clock_time);
-	recovery_count	= cycle_count - active_count;
+	ide_timing_compute(drive, mode, &t, T, 0);
 
 	/*
 	 * In case we've got too long recovery phase, try to lengthen
 	 * the active phase
 	 */
-	if (recovery_count > 16) {
-		active_count += recovery_count - 16;
-		recovery_count = 16;
+	if (t.recover > 16) {
+		t.active += t.recover - 16;
+		t.recover = 16;
 	}
-	if (active_count > 16)		/* shouldn't actually happen... */
-	 	active_count = 16;
+	if (t.active > 16)		/* shouldn't actually happen... */
+		t.active = 16;
 
 	/*
 	 * Convert values to internal chipset representation
 	 */
-	recovery_count = recovery_values[recovery_count];
- 	active_count  &= 0x0f;
+	t.recover = recovery_values[t.recover];
+	t.active &= 0x0f;
 
 	/* Program the active/recovery counts into the DRWTIM register */
-	drwtim = (active_count << 4) | recovery_count;
-	(void) pci_write_config_byte(dev, drwtim_regs[drive->dn], drwtim);
-}
-
-/*
- * This routine writes into the chipset registers
- * PIO setup/active/recovery timings.
- */
-static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct ide_timing *t	= ide_timing_find_mode(XFER_PIO_0 + pio);
-	unsigned long setup_count;
-	unsigned int cycle_time;
-	u8 arttim = 0;
-
-	static const u8 setup_values[] = {0x40, 0x40, 0x40, 0x80, 0, 0xc0};
-	static const u8 arttim_regs[4] = {ARTTIM0, ARTTIM1, ARTTIM23, ARTTIM23};
-
-	cycle_time = ide_pio_cycle_time(drive, pio);
-
-	program_cycle_times(drive, cycle_time, t->active);
-
-	setup_count = quantize_timing(t->setup,
-			1000 / (ide_pci_clk ? ide_pci_clk : 33));
+	pci_write_config_byte(dev, drwtim_regs[drive->dn],
+			      (t.active << 4) | t.recover);
 
 	/*
 	 * The primary channel has individual address setup timing registers
@@ -126,15 +97,21 @@
 	if (hwif->channel) {
 		ide_drive_t *pair = ide_get_pair_dev(drive);
 
-		ide_set_drivedata(drive, (void *)setup_count);
+		if (pair) {
+			struct ide_timing tp;
 
-		if (pair)
-			setup_count = max_t(u8, setup_count,
-					(unsigned long)ide_get_drivedata(pair));
+			ide_timing_compute(pair, pair->pio_mode, &tp, T, 0);
+			ide_timing_merge(&t, &tp, &t, IDE_TIMING_SETUP);
+			if (pair->dma_mode) {
+				ide_timing_compute(pair, pair->dma_mode,
+						&tp, T, 0);
+				ide_timing_merge(&tp, &t, &t, IDE_TIMING_SETUP);
+			}
+		}
 	}
 
-	if (setup_count > 5)		/* shouldn't actually happen... */
-		setup_count = 5;
+	if (t.setup > 5)		/* shouldn't actually happen... */
+		t.setup = 5;
 
 	/*
 	 * Program the address setup clocks into the ARTTIM registers.
@@ -144,7 +121,7 @@
 	if (hwif->channel)
 		arttim &= ~ARTTIM23_INTR_CH1;
 	arttim &= ~0xc0;
-	arttim |= setup_values[setup_count];
+	arttim |= setup_values[t.setup];
 	(void) pci_write_config_byte(dev, arttim_regs[drive->dn], arttim);
 }
 
@@ -153,8 +130,10 @@
  * Special cases are 8: prefetch off, 9: prefetch on (both never worked)
  */
 
-static void cmd64x_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
+
 	/*
 	 * Filter out the prefetch control values
 	 * to prevent PIO5 from being programmed
@@ -162,20 +141,18 @@
 	if (pio == 8 || pio == 9)
 		return;
 
-	cmd64x_tune_pio(drive, pio);
+	cmd64x_program_timings(drive, XFER_PIO_0 + pio);
 }
 
-static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cmd64x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 unit			= drive->dn & 0x01;
 	u8 regU = 0, pciU	= hwif->channel ? UDIDETCR1 : UDIDETCR0;
+	const u8 speed		= drive->dma_mode;
 
-	if (speed >= XFER_SW_DMA_0) {
-		(void) pci_read_config_byte(dev, pciU, &regU);
-		regU &= ~(unit ? 0xCA : 0x35);
-	}
+	pci_read_config_byte(dev, pciU, &regU);
+	regU &= ~(unit ? 0xCA : 0x35);
 
 	switch(speed) {
 	case XFER_UDMA_5:
@@ -197,18 +174,13 @@
 		regU |= unit ? 0xC2 : 0x31;
 		break;
 	case XFER_MW_DMA_2:
-		program_cycle_times(drive, 120, 70);
-		break;
 	case XFER_MW_DMA_1:
-		program_cycle_times(drive, 150, 80);
-		break;
 	case XFER_MW_DMA_0:
-		program_cycle_times(drive, 480, 215);
+		cmd64x_program_timings(drive, speed);
 		break;
 	}
 
-	if (speed >= XFER_SW_DMA_0)
-		(void) pci_write_config_byte(dev, pciU, regU);
+	pci_write_config_byte(dev, pciU, regU);
 }
 
 static void cmd648_clear_irq(ide_drive_t *drive)
@@ -471,6 +443,6 @@
 module_init(cmd64x_ide_init);
 module_exit(cmd64x_ide_exit);
 
-MODULE_AUTHOR("Eddie Dost, David Miller, Andre Hedrick");
+MODULE_AUTHOR("Eddie Dost, David Miller, Andre Hedrick, Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("PCI driver module for CMD64x IDE");
 MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 09f98ed..2c1e5f7 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -57,11 +57,11 @@
 	{1, 2, 1}
 };
 
-static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int controller = drive->dn > 1 ? 1 : 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/* 8bit CAT/CRT - 8bit command timing for channel */
 	pci_write_config_byte(pdev, 0x62 + controller, 
@@ -81,11 +81,12 @@
 		(cs5520_pio_clocks[pio].assert));
 }
 
-static void cs5520_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cs5520_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	printk(KERN_ERR "cs55x0: bad ide timing.\n");
 
-	cs5520_set_pio_mode(drive, 0);
+	drive->pio_mode = XFER_PIO_0 + 0;
+	cs5520_set_pio_mode(hwif, drive);
 }
 
 static const struct ide_port_ops cs5520_port_ops = {
diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c
index 40bf05e..4dc4eb9 100644
--- a/drivers/ide/cs5530.c
+++ b/drivers/ide/cs5530.c
@@ -41,8 +41,8 @@
 
 /**
  *	cs5530_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Handles setting of PIO mode for the chipset.
  *
@@ -50,10 +50,11 @@
  *	will have valid default PIO timings set up before we get here.
  */
 
-static void cs5530_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5530_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	unsigned long basereg = CS5530_BASEREG(drive->hwif);
+	unsigned long basereg = CS5530_BASEREG(hwif);
 	unsigned int format = (inl(basereg + 4) >> 31) & 1;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	outl(cs5530_pio_timings[format][pio], basereg + ((drive->dn & 1)<<3));
 }
@@ -99,12 +100,12 @@
 	return mask;
 }
 
-static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cs5530_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long basereg;
 	unsigned int reg, timings = 0;
 
-	switch (mode) {
+	switch (drive->dma_mode) {
 		case XFER_UDMA_0:	timings = 0x00921250; break;
 		case XFER_UDMA_1:	timings = 0x00911140; break;
 		case XFER_UDMA_2:	timings = 0x00911030; break;
@@ -112,7 +113,7 @@
 		case XFER_MW_DMA_1:	timings = 0x00012121; break;
 		case XFER_MW_DMA_2:	timings = 0x00002020; break;
 	}
-	basereg = CS5530_BASEREG(drive->hwif);
+	basereg = CS5530_BASEREG(hwif);
 	reg = inl(basereg + 4);			/* get drive0 config register */
 	timings |= reg & 0x80000000;		/* preserve PIO format bit */
 	if ((drive-> dn & 1) == 0) {		/* are we configuring drive0? */
diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c
index b883838..5059faf 100644
--- a/drivers/ide/cs5535.c
+++ b/drivers/ide/cs5535.c
@@ -86,7 +86,7 @@
 		cmd = pioa = speed - XFER_PIO_0;
 
 		if (pair) {
-			u8 piob = ide_get_best_pio_mode(pair, 255, 4);
+			u8 piob = pair->pio_mode - XFER_PIO_0;
 
 			if (piob < cmd)
 				cmd = piob;
@@ -129,28 +129,28 @@
 
 /**
  *	cs5535_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Programs the chipset for DMA mode.
  */
 
-static void cs5535_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cs5535_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	cs5535_set_speed(drive, speed);
+	cs5535_set_speed(drive, drive->dma_mode);
 }
 
 /**
  *	cs5535_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	A callback from the upper layers for PIO-only tuning.
  */
 
-static void cs5535_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5535_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	cs5535_set_speed(drive, XFER_PIO_0 + pio);
+	cs5535_set_speed(drive, drive->pio_mode);
 }
 
 static u8 cs5535_cable_detect(ide_hwif_t *hwif)
diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c
index 9623b85..24214ab 100644
--- a/drivers/ide/cs5536.c
+++ b/drivers/ide/cs5536.c
@@ -125,11 +125,11 @@
 
 /**
  *	cs5536_set_pio_mode		-	PIO timing setup
+ *	@hwif: ATA port
  *	@drive: ATA device
- *	@pio: PIO mode number
  */
 
-static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 drv_timings[5] = {
 		0x98, 0x55, 0x32, 0x21, 0x20,
@@ -143,15 +143,16 @@
 		0x99, 0x92, 0x90, 0x22, 0x20,
 	};
 
-	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	int cshift = (drive->dn & 1) ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT;
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 cast;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 cmd_pio = pio;
 
 	if (pair)
-		cmd_pio = min(pio, ide_get_best_pio_mode(pair, 255, 4));
+		cmd_pio = min_t(u8, pio, pair->pio_mode - XFER_PIO_0);
 
 	timings &= (IDE_DRV_MASK << 8);
 	timings |= drv_timings[pio];
@@ -172,11 +173,11 @@
 
 /**
  *	cs5536_set_dma_mode		-	DMA timing setup
+ *	@hwif: ATA port
  *	@drive: ATA device
- *	@mode: DMA mode
  */
 
-static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cs5536_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 udma_timings[6] = {
 		0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6,
@@ -186,10 +187,11 @@
 		0x67, 0x21, 0x20,
 	};
 
-	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int dshift = (drive->dn & 1) ? IDE_D1_SHIFT : IDE_D0_SHIFT;
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 etc;
+	const u8 mode = drive->dma_mode;
 
 	cs5536_read(pdev, ETC, &etc);
 
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index d6e2cbb..9383f67 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -1,43 +1,11 @@
 /*
  *  Copyright (C) 1998-2000 Andreas S. Krebs (akrebs@altavista.net), Maintainer
  *  Copyright (C) 1998-2002 Andre Hedrick <andre@linux-ide.org>, Integrator
+ *  Copyright (C) 2007-2010 Bartlomiej Zolnierkiewicz
  *
  * CYPRESS CY82C693 chipset IDE controller
  *
  * The CY82C693 chipset is used on Digital's PC-Alpha 164SX boards.
- * Writing the driver was quite simple, since most of the job is
- * done by the generic pci-ide support.
- * The hard part was finding the CY82C693's datasheet on Cypress's
- * web page :-(. But Altavista solved this problem :-).
- *
- *
- * Notes:
- * - I recently got a 16.8G IBM DTTA, so I was able to test it with
- *   a large and fast disk - the results look great, so I'd say the
- *   driver is working fine :-)
- *   hdparm -t reports 8.17 MB/sec at about 6% CPU usage for the DTTA
- * - this is my first linux driver, so there's probably a lot  of room
- *   for optimizations and bug fixing, so feel free to do it.
- * - if using PIO mode it's a good idea to set the PIO mode and
- *   32-bit I/O support (if possible), e.g. hdparm -p2 -c1 /dev/hda
- * - I had some problems with my IBM DHEA with PIO modes < 2
- *   (lost interrupts) ?????
- * - first tests with DMA look okay, they seem to work, but there is a
- *   problem with sound - the BusMaster IDE TimeOut should fixed this
- *
- * Ancient History:
- * AMH@1999-08-24: v0.34 init_cy82c693_chip moved to pci_init_cy82c693
- * ASK@1999-01-23: v0.33 made a few minor code clean ups
- *                       removed DMA clock speed setting by default
- *                       added boot message
- * ASK@1998-11-01: v0.32 added support to set BusMaster IDE TimeOut
- *                       added support to set DMA Controller Clock Speed
- * ASK@1998-10-31: v0.31 fixed problem with setting to high DMA modes
- *                       on some drives.
- * ASK@1998-10-29: v0.3 added support to set DMA modes
- * ASK@1998-10-28: v0.2 added support to set PIO modes
- * ASK@1998-10-27: v0.1 first version - chipset detection
- *
  */
 
 #include <linux/module.h>
@@ -81,87 +49,13 @@
 #define CY82_INDEX_CHANNEL1	0x31
 #define CY82_INDEX_TIMEOUT	0x32
 
-/* the min and max PCI bus speed in MHz - from datasheet */
-#define CY82C963_MIN_BUS_SPEED	25
-#define CY82C963_MAX_BUS_SPEED	33
-
-/* the struct for the PIO mode timings */
-typedef struct pio_clocks_s {
-	u8	address_time;	/* Address setup (clocks) */
-	u8	time_16r;	/* clocks for 16bit IOR (0xF0=Active/data, 0x0F=Recovery) */
-	u8	time_16w;	/* clocks for 16bit IOW (0xF0=Active/data, 0x0F=Recovery) */
-	u8	time_8;		/* clocks for 8bit (0xF0=Active/data, 0x0F=Recovery) */
-} pio_clocks_t;
-
-/*
- * calc clocks using bus_speed
- * returns (rounded up) time in bus clocks for time in ns
- */
-static int calc_clk(int time, int bus_speed)
-{
-	int clocks;
-
-	clocks = (time*bus_speed+999)/1000 - 1;
-
-	if (clocks < 0)
-		clocks = 0;
-
-	if (clocks > 0x0F)
-		clocks = 0x0F;
-
-	return clocks;
-}
-
-/*
- * compute the values for the clock registers for PIO
- * mode and pci_clk [MHz] speed
- *
- * NOTE: for mode 0,1 and 2 drives 8-bit IDE command control registers are used
- *       for mode 3 and 4 drives 8 and 16-bit timings are the same
- *
- */
-static void compute_clocks(u8 pio, pio_clocks_t *p_pclk)
-{
-	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-	int clk1, clk2;
-	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-
-	/* we don't check against CY82C693's min and max speed,
-	 * so you can play with the idebus=xx parameter
-	 */
-
-	/* let's calc the address setup time clocks */
-	p_pclk->address_time = (u8)calc_clk(t->setup, bus_speed);
-
-	/* let's calc the active and recovery time clocks */
-	clk1 = calc_clk(t->active, bus_speed);
-
-	/* calc recovery timing */
-	clk2 = t->cycle - t->active - t->setup;
-
-	clk2 = calc_clk(clk2, bus_speed);
-
-	clk1 = (clk1<<4)|clk2;	/* combine active and recovery clocks */
-
-	/* note: we use the same values for 16bit IOR and IOW
-	 *	those are all the same, since I don't have other
-	 *	timings than those from ide-lib.c
-	 */
-
-	p_pclk->time_16r = (u8)clk1;
-	p_pclk->time_16w = (u8)clk1;
-
-	/* what are good values for 8bit ?? */
-	p_pclk->time_8 = (u8)clk1;
-}
-
 /*
  * set DMA mode a specific channel for CY82C693
  */
 
-static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cy82c693_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
+	const u8 mode = drive->dma_mode;
 	u8 single = (mode & 0x10) >> 4, index = 0, data = 0;
 
 	index = hwif->channel ? CY82_INDEX_CHANNEL1 : CY82_INDEX_CHANNEL0;
@@ -186,12 +80,14 @@
 	outb(data, CY82_DATA_PORT);
 }
 
-static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cy82c693_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	pio_clocks_t pclk;
+	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
+	const unsigned long T = 1000000 / bus_speed;
 	unsigned int addrCtrl;
+	struct ide_timing t;
+	u8 time_16, time_8;
 
 	/* select primary or secondary channel */
 	if (hwif->index > 0) {  /* drive is on the secondary channel */
@@ -204,8 +100,12 @@
 		}
 	}
 
-	/* let's calc the values for this PIO mode */
-	compute_clocks(pio, &pclk);
+	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
+
+	time_16 = clamp_val(t.recover - 1, 0, 15) |
+		  (clamp_val(t.active - 1, 0, 15) << 4);
+	time_8 = clamp_val(t.act8b - 1, 0, 15) |
+		 (clamp_val(t.rec8b - 1, 0, 15) << 4);
 
 	/* now let's write  the clocks registers */
 	if ((drive->dn & 1) == 0) {
@@ -217,13 +117,13 @@
 		pci_read_config_dword(dev, CY82_IDE_ADDRSETUP, &addrCtrl);
 
 		addrCtrl &= (~0xF);
-		addrCtrl |= (unsigned int)pclk.address_time;
+		addrCtrl |= clamp_val(t.setup - 1, 0, 15);
 		pci_write_config_dword(dev, CY82_IDE_ADDRSETUP, addrCtrl);
 
 		/* now let's set the remaining registers */
-		pci_write_config_byte(dev, CY82_IDE_MASTER_IOR, pclk.time_16r);
-		pci_write_config_byte(dev, CY82_IDE_MASTER_IOW, pclk.time_16w);
-		pci_write_config_byte(dev, CY82_IDE_MASTER_8BIT, pclk.time_8);
+		pci_write_config_byte(dev, CY82_IDE_MASTER_IOR, time_16);
+		pci_write_config_byte(dev, CY82_IDE_MASTER_IOW, time_16);
+		pci_write_config_byte(dev, CY82_IDE_MASTER_8BIT, time_8);
 	} else {
 		/*
 		 * set slave drive
@@ -233,13 +133,13 @@
 		pci_read_config_dword(dev, CY82_IDE_ADDRSETUP, &addrCtrl);
 
 		addrCtrl &= (~0xF0);
-		addrCtrl |= ((unsigned int)pclk.address_time<<4);
+		addrCtrl |= (clamp_val(t.setup - 1, 0, 15) << 4);
 		pci_write_config_dword(dev, CY82_IDE_ADDRSETUP, addrCtrl);
 
 		/* now let's set the remaining registers */
-		pci_write_config_byte(dev, CY82_IDE_SLAVE_IOR, pclk.time_16r);
-		pci_write_config_byte(dev, CY82_IDE_SLAVE_IOW, pclk.time_16w);
-		pci_write_config_byte(dev, CY82_IDE_SLAVE_8BIT, pclk.time_8);
+		pci_write_config_byte(dev, CY82_IDE_SLAVE_IOR, time_16);
+		pci_write_config_byte(dev, CY82_IDE_SLAVE_IOW, time_16);
+		pci_write_config_byte(dev, CY82_IDE_SLAVE_8BIT, time_8);
 	}
 }
 
@@ -325,6 +225,6 @@
 module_init(cy82c693_ide_init);
 module_exit(cy82c693_ide_exit);
 
-MODULE_AUTHOR("Andreas Krebs, Andre Hedrick");
+MODULE_AUTHOR("Andreas Krebs, Andre Hedrick, Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("PCI driver module for the Cypress CY82C693 IDE");
 MODULE_LICENSE("GPL");
diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c
index c6b1381..6929f7f 100644
--- a/drivers/ide/dtc2278.c
+++ b/drivers/ide/dtc2278.c
@@ -68,11 +68,11 @@
 
 static DEFINE_SPINLOCK(dtc2278_lock);
 
-static void dtc2278_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void dtc2278_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long flags;
 
-	if (pio >= 3) {
+	if (drive->pio_mode >= XFER_PIO_3) {
 		spin_lock_irqsave(&dtc2278_lock, flags);
 		/*
 		 * This enables PIO mode4 (3?) on the first interface
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 4d90ac2..b885c1d 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -627,14 +627,14 @@
 	return info->timings->clock_table[info->clock][i];
 }
 
-static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
+static void hpt3xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	struct hpt_timings *t	= info->timings;
 	u8  itr_addr		= 0x40 + (drive->dn * 4);
 	u32 old_itr		= 0;
+	const u8 speed		= drive->dma_mode;
 	u32 new_itr		= get_speed_setting(speed, info);
 	u32 itr_mask		= speed < XFER_MW_DMA_0 ? t->pio_mask :
 				 (speed < XFER_UDMA_0   ? t->dma_mask :
@@ -651,9 +651,10 @@
 	pci_write_config_dword(dev, itr_addr, new_itr);
 }
 
-static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
+	drive->dma_mode = drive->pio_mode;
+	hpt3xx_set_mode(hwif, drive);
 }
 
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
index aafed80..d81e496 100644
--- a/drivers/ide/ht6560b.c
+++ b/drivers/ide/ht6560b.c
@@ -279,9 +279,10 @@
 #endif
 }
 
-static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ht6560b_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long flags, config;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 timing;
 	
 	switch (pio) {
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 0f67f1a..4a697a2 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -65,6 +65,8 @@
 };
 
 struct icside_state {
+	unsigned int channel;
+	unsigned int enabled;
 	void __iomem *irq_port;
 	void __iomem *ioc_base;
 	unsigned int sel;
@@ -114,11 +116,18 @@
 	struct icside_state *state = ec->irq_data;
 	void __iomem *base = state->irq_port;
 
-	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
-	readb(base + ICS_ARCIN_V6_INTROFFSET_2);
+	state->enabled = 1;
 
-	writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
-	readb(base + ICS_ARCIN_V6_INTROFFSET_1);
+	switch (state->channel) {
+	case 0:
+		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
+		readb(base + ICS_ARCIN_V6_INTROFFSET_2);
+		break;
+	case 1:
+		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
+		readb(base + ICS_ARCIN_V6_INTROFFSET_1);
+		break;
+	}
 }
 
 /* Prototype: icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
@@ -128,6 +137,8 @@
 {
 	struct icside_state *state = ec->irq_data;
 
+	state->enabled = 0;
+
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
 	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
 }
@@ -149,6 +160,44 @@
 	.irqpending	= icside_irqpending_arcin_v6,
 };
 
+/*
+ * Handle routing of interrupts.  This is called before
+ * we write the command to the drive.
+ */
+static void icside_maskproc(ide_drive_t *drive, int mask)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct expansion_card *ec = ECARD_DEV(hwif->dev);
+	struct icside_state *state = ecard_get_drvdata(ec);
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	state->channel = hwif->channel;
+
+	if (state->enabled && !mask) {
+		switch (hwif->channel) {
+		case 0:
+			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
+			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
+			break;
+		case 1:
+			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
+			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
+			break;
+		}
+	} else {
+		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
+		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
+	}
+
+	local_irq_restore(flags);
+}
+
+static const struct ide_port_ops icside_v6_no_dma_port_ops = {
+	.maskproc		= icside_maskproc,
+};
+
 #ifdef CONFIG_BLK_DEV_IDEDMA_ICS
 /*
  * SG-DMA support.
@@ -185,10 +234,11 @@
  *	MW1	80	50	50	150	C
  *	MW2	70	25	25	120	C
  */
-static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
+static void icside_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long cycle_time;
 	int use_dma_info = 0;
+	const u8 xfer_mode = drive->dma_mode;
 
 	switch (xfer_mode) {
 	case XFER_MW_DMA_2:
@@ -228,6 +278,7 @@
 
 static const struct ide_port_ops icside_v6_port_ops = {
 	.set_dma_mode		= icside_set_dma_mode,
+	.maskproc		= icside_maskproc,
 };
 
 static void icside_dma_host_set(ide_drive_t *drive, int on)
@@ -272,6 +323,11 @@
 	BUG_ON(dma_channel_active(ec->dma));
 
 	/*
+	 * Ensure that we have the right interrupt routed.
+	 */
+	icside_maskproc(drive, 0);
+
+	/*
 	 * Route the DMA signals to the correct interface.
 	 */
 	writeb(state->sel | hwif->channel, state->ioc_base);
@@ -399,6 +455,7 @@
 
 static const struct ide_port_info icside_v6_port_info __initdata = {
 	.init_dma		= icside_dma_off_init,
+	.port_ops		= &icside_v6_no_dma_port_ops,
 	.dma_ops		= &icside_v6_dma_ops,
 	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
 	.mwdma_mask		= ATA_MWDMA2,
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
index dd63963..ab87e4f 100644
--- a/drivers/ide/ide-cs.c
+++ b/drivers/ide/ide-cs.c
@@ -121,19 +121,11 @@
 static void ide_detach(struct pcmcia_device *link)
 {
     ide_info_t *info = link->priv;
-    ide_hwif_t *hwif = info->host->ports[0];
-    unsigned long data_addr, ctl_addr;
 
     dev_dbg(&link->dev, "ide_detach(0x%p)\n", link);
 
-    data_addr = hwif->io_ports.data_addr;
-    ctl_addr  = hwif->io_ports.ctl_addr;
-
     ide_release(link);
 
-    release_region(ctl_addr, 1);
-    release_region(data_addr, 8);
-
     kfree(info);
 } /* ide_detach */
 
@@ -354,12 +346,19 @@
 
     dev_dbg(&link->dev, "ide_release(0x%p)\n", link);
 
-    if (info->ndev)
-	/* FIXME: if this fails we need to queue the cleanup somehow
-	   -- need to investigate the required PCMCIA magic */
-	ide_host_remove(host);
+    if (info->ndev) {
+	ide_hwif_t *hwif = host->ports[0];
+	unsigned long data_addr, ctl_addr;
 
-    info->ndev = 0;
+	data_addr = hwif->io_ports.data_addr;
+	ctl_addr = hwif->io_ports.ctl_addr;
+
+	ide_host_remove(host);
+	info->ndev = 0;
+
+	release_region(ctl_addr, 1);
+	release_region(data_addr, 8);
+    }
 
     pcmcia_disable_device(link);
 } /* ide_release */
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 1099bf7..c6935c7 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -105,15 +105,17 @@
 		return -ENOSYS;
 
 	if (set_pio_mode_abuse(drive->hwif, arg)) {
+		drive->pio_mode = arg + XFER_PIO_0;
+
 		if (arg == 8 || arg == 9) {
 			unsigned long flags;
 
 			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
 			spin_lock_irqsave(&hwif->lock, flags);
-			port_ops->set_pio_mode(drive, arg);
+			port_ops->set_pio_mode(hwif, drive);
 			spin_unlock_irqrestore(&hwif->lock, flags);
 		} else
-			port_ops->set_pio_mode(drive, arg);
+			port_ops->set_pio_mode(hwif, drive);
 	} else {
 		int keep_dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 222c1ef..376f2dc 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -231,7 +231,7 @@
 	u16 *id = drive->id;
 	int ivb = ide_in_drive_list(id, ivb_list);
 
-	if (hwif->cbl == ATA_CBL_PATA40_SHORT)
+	if (hwif->cbl == ATA_CBL_SATA || hwif->cbl == ATA_CBL_PATA40_SHORT)
 		return 1;
 
 	if (ivb)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f8c1ae6..fbedd35 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1042,6 +1042,8 @@
 		if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS)
 			drive->dev_flags |= IDE_DFLAG_NO_UNMASK;
 
+		drive->pio_mode = XFER_PIO_0;
+
 		if (port_ops && port_ops->init_dev)
 			port_ops->init_dev(drive);
 	}
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 6a0e625..b072328 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1365,7 +1365,7 @@
  * supported here, and not in the corresponding block interface. Our own
  * ide-tape ioctls are supported on both interfaces.
  */
-static int idetape_chrdev_ioctl(struct inode *inode, struct file *file,
+static long do_idetape_chrdev_ioctl(struct file *file,
 				unsigned int cmd, unsigned long arg)
 {
 	struct ide_tape_obj *tape = file->private_data;
@@ -1420,6 +1420,16 @@
 	}
 }
 
+static long idetape_chrdev_ioctl(struct file *file,
+				unsigned int cmd, unsigned long arg)
+{
+	long ret;
+	lock_kernel();
+	ret = do_idetape_chrdev_ioctl(file, cmd, arg);
+	unlock_kernel();
+	return ret;
+}
+
 /*
  * Do a mode sense page 0 with block descriptor and if it succeeds set the tape
  * block size with the reported value.
@@ -1888,7 +1898,7 @@
 	.owner		= THIS_MODULE,
 	.read		= idetape_chrdev_read,
 	.write		= idetape_chrdev_write,
-	.ioctl		= idetape_chrdev_ioctl,
+	.unlocked_ioctl	= idetape_chrdev_ioctl,
 	.open		= idetape_chrdev_open,
 	.release	= idetape_chrdev_release,
 };
diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c
index 001a563..0e05f75 100644
--- a/drivers/ide/ide-timings.c
+++ b/drivers/ide/ide-timings.c
@@ -166,12 +166,13 @@
 	if (id[ATA_ID_FIELD_VALID] & 2) {	/* EIDE drive */
 		memset(&p, 0, sizeof(p));
 
-		if (speed <= XFER_PIO_2)
-			p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
-		else if ((speed <= XFER_PIO_4) ||
-			 (speed == XFER_PIO_5 && !ata_id_is_cfa(id)))
-			p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
-		else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
+		if (speed >= XFER_PIO_0 && speed < XFER_SW_DMA_0) {
+			if (speed <= XFER_PIO_2)
+				p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
+			else if ((speed <= XFER_PIO_4) ||
+				 (speed == XFER_PIO_5 && !ata_id_is_cfa(id)))
+				p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
+		} else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
 			p.cycle = id[ATA_ID_EIDE_DMA_MIN];
 
 		ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B);
@@ -185,11 +186,10 @@
 	/*
 	 * Even in DMA/UDMA modes we still use PIO access for IDENTIFY,
 	 * S.M.A.R.T and some other commands. We have to ensure that the
-	 * DMA cycle timing is slower/equal than the fastest PIO timing.
+	 * DMA cycle timing is slower/equal than the current PIO timing.
 	 */
 	if (speed >= XFER_SW_DMA_0) {
-		u8 pio = ide_get_best_pio_mode(drive, 255, 5);
-		ide_timing_compute(drive, XFER_PIO_0 + pio, &p, T, UT);
+		ide_timing_compute(drive, drive->pio_mode, &p, T, UT);
 		ide_timing_merge(&p, t, t, IDE_TIMING_ALL);
 	}
 
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index 46d203c..5fc8d5c 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -58,7 +58,7 @@
  *	This is used by most chipset support modules when "auto-tuning".
  */
 
-u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
+static u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
 {
 	u16 *id = drive->id;
 	int pio_mode = -1, overridden = 0;
@@ -105,7 +105,6 @@
 
 	return pio_mode;
 }
-EXPORT_SYMBOL_GPL(ide_get_best_pio_mode);
 
 int ide_pio_need_iordy(ide_drive_t *drive, const u8 pio)
 {
@@ -135,17 +134,20 @@
 	 * set transfer mode on the device in ->set_pio_mode method...
 	 */
 	if (port_ops->set_dma_mode == NULL) {
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		drive->pio_mode = mode;
+		port_ops->set_pio_mode(hwif, drive);
 		return 0;
 	}
 
 	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		drive->pio_mode = mode;
+		port_ops->set_pio_mode(hwif, drive);
 		return 0;
 	} else {
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		drive->pio_mode = mode;
+		port_ops->set_pio_mode(hwif, drive);
 		return ide_config_drive_speed(drive, mode);
 	}
 }
@@ -164,10 +166,12 @@
 	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
-		port_ops->set_dma_mode(drive, mode);
+		drive->dma_mode = mode;
+		port_ops->set_dma_mode(hwif, drive);
 		return 0;
 	} else {
-		port_ops->set_dma_mode(drive, mode);
+		drive->dma_mode = mode;
+		port_ops->set_dma_mode(hwif, drive);
 		return ide_config_drive_speed(drive, mode);
 	}
 }
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
index 0d266a5..560e66d 100644
--- a/drivers/ide/it8172.c
+++ b/drivers/ide/it8172.c
@@ -37,12 +37,12 @@
 
 #define DRV_NAME "IT8172"
 
-static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u16 drive_enables;
 	u32 drive_timing;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * The highest value of DIOR/DIOW pulse width and recovery time
@@ -77,14 +77,14 @@
 	pci_write_config_dword(dev, 0x44, drive_timing);
 }
 
-static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it8172_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int a_speed		= 3 << (drive->dn * 4);
 	int u_flag		= 1 << drive->dn;
 	int u_speed		= 0;
 	u8 reg48, reg4a;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_byte(dev, 0x48, &reg48);
 	pci_read_config_byte(dev, 0x4a, &reg4a);
@@ -98,14 +98,14 @@
 		pci_write_config_byte(dev, 0x4a, reg4a | u_speed);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
 		pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed);
 
-		pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+		drive->pio_mode =
+			mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 
-		it8172_set_pio_mode(drive, pio);
+		it8172_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 4797616..46816ba 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -17,15 +17,14 @@
 
 /**
  *	it8213_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode.
  */
 
-static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= 0x40;
@@ -35,6 +34,7 @@
 	u8 slave_data;
 	static DEFINE_SPINLOCK(tune_lock);
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	static const u8 timings[][2] = {
 					{ 0, 0 },
@@ -74,15 +74,14 @@
 
 /**
  *	it8213_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the ITE chipset for the DMA mode.
  */
 
-static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it8213_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -92,6 +91,7 @@
 	int u_speed		= 0;
 	u16			reg4042, reg4a;
 	u8			reg48, reg54, reg55;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	pci_read_config_byte(dev, 0x48, &reg48);
@@ -120,7 +120,6 @@
 			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
@@ -132,11 +131,12 @@
 			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		it8213_set_pio_mode(drive, pio);
+		it8213_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index 51aa745..b2709c7 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -228,18 +228,18 @@
 
 /**
  *	it821x_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Tune the host to the desired PIO mode taking into the consideration
  *	the maximum PIO mode supported by the other device on the cable.
  */
 
-static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it821x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
 	ide_drive_t *pair = ide_get_pair_dev(drive);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 unit = drive->dn & 1, set_pio = pio;
 
 	/* Spec says 89 ref driver uses 88 */
@@ -252,7 +252,7 @@
 	 * on the cable.
 	 */
 	if (pair) {
-		u8 pair_pio = ide_get_best_pio_mode(pair, 255, 4);
+		u8 pair_pio = pair->pio_mode - XFER_PIO_0;
 		/* trim PIO to the slowest of the master/slave */
 		if (pair_pio < set_pio)
 			set_pio = pair_pio;
@@ -393,14 +393,16 @@
 
 /**
  *	it821x_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the ITE chipset for the desired DMA mode.
  */
 
-static void it821x_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it821x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 speed = drive->dma_mode;
+
 	/*
 	 * MWDMA tuning is really hard because our MWDMA and PIO
 	 * timings are kept in the same place.  We can switch in the
diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c
index bf2be64..74c2c4a 100644
--- a/drivers/ide/jmicron.c
+++ b/drivers/ide/jmicron.c
@@ -80,19 +80,19 @@
 	return ATA_CBL_PATA80;
 }
 
-static void jmicron_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
 /**
  *	jmicron_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@mode: DMA mode
  *
  *	As the JMicron snoops for timings we don't need to do anything here.
  */
 
-static void jmicron_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void jmicron_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/opti621.c b/drivers/ide/opti621.c
index f1d70d6..1a53a4c 100644
--- a/drivers/ide/opti621.c
+++ b/drivers/ide/opti621.c
@@ -8,77 +8,6 @@
  * Jan Harkes <jaharkes@cwi.nl>,
  * Mark Lord <mlord@pobox.com>
  * Some parts of code are from ali14xx.c and from rz1000.c.
- *
- * OPTi is trademark of OPTi, Octek is trademark of Octek.
- *
- * I used docs from OPTi databook, from ftp.opti.com, file 9123-0002.ps
- * and disassembled/traced setupvic.exe (DOS program).
- * It increases kernel code about 2 kB.
- * I don't have this card no more, but I hope I can get some in case
- * of needed development.
- * My card is Octek PIDE 1.01 (on card) or OPTiViC (program).
- * It has a place for a secondary connector in circuit, but nothing
- * is there. Also BIOS says no address for
- * secondary controller (see bellow in ide_init_opti621).
- * I've only tested this on my system, which only has one disk.
- * It's Western Digital WDAC2850, with PIO mode 3. The PCI bus
- * is at 20 MHz (I have DX2/80, I tried PCI at 40, but I got random
- * lockups). I tried the OCTEK double speed CD-ROM and
- * it does not work! But I can't boot DOS also, so it's probably
- * hardware fault. I have connected Conner 80MB, the Seagate 850MB (no
- * problems) and Seagate 1GB (as slave, WD as master). My experiences
- * with the third, 1GB drive: I got 3MB/s (hdparm), but sometimes
- * it slows to about 100kB/s! I don't know why and I have
- * not this drive now, so I can't try it again.
- * I write this driver because I lost the paper ("manual") with
- * settings of jumpers on the card and I have to boot Linux with
- * Loadlin except LILO, cause I have to run the setupvic.exe program
- * already or I get disk errors (my test: rpm -Vf
- * /usr/X11R6/bin/XF86_SVGA - or any big file).
- * Some numbers from hdparm -t /dev/hda:
- * Timing buffer-cache reads:   32 MB in  3.02 seconds =10.60 MB/sec
- * Timing buffered disk reads:  16 MB in  5.52 seconds = 2.90 MB/sec
- * I have 4 Megs/s before, but I don't know why (maybe changes
- * in hdparm test).
- * After release of 0.1, I got some successful reports, so it might work.
- *
- * The main problem with OPTi is that some timings for master
- * and slave must be the same. For example, if you have master
- * PIO 3 and slave PIO 0, driver have to set some timings of
- * master for PIO 0. Second problem is that opti621_set_pio_mode
- * got only one drive to set, but have to set both drives.
- * This is solved in compute_pios. If you don't set
- * the second drive, compute_pios use ide_get_best_pio_mode
- * for autoselect mode (you can change it to PIO 0, if you want).
- * If you then set the second drive to another PIO, the old value
- * (automatically selected) will be overrided by yours.
- * There is a 25/33MHz switch in configuration
- * register, but driver is written for use at any frequency.
- *
- * Version 0.1, Nov 8, 1996
- * by Jaromir Koutek, for 2.1.8.
- * Initial version of driver.
- *
- * Version 0.2
- * Number 0.2 skipped.
- *
- * Version 0.3, Nov 29, 1997
- * by Mark Lord (probably), for 2.1.68
- * Updates for use with new IDE block driver.
- *
- * Version 0.4, Dec 14, 1997
- * by Jan Harkes
- * Fixed some errors and cleaned the code.
- *
- * Version 0.5, Jan 2, 1998
- * by Jaromir Koutek
- * Updates for use with (again) new IDE block driver.
- * Update of documentation.
- *
- * Version 0.6, Jan 2, 1999
- * by Jaromir Koutek
- * Reversed to version 0.3 of the driver, because
- * 0.5 doesn't work.
  */
 
 #include <linux/types.h>
@@ -133,12 +62,12 @@
 	return ret;
 }
 
-static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void opti621_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	unsigned long flags;
-	unsigned long mode = XFER_PIO_0 + pio, pair_mode;
+	unsigned long mode = drive->pio_mode, pair_mode;
+	const u8 pio = mode - XFER_PIO_0;
 	u8 tim, misc, addr_pio = pio, clk;
 
 	/* DRDY is default 2 (by OPTi Databook) */
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index f8eddf0..9e8f4e1 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -166,7 +166,7 @@
 	writel(val32, base + BK3710_DATRCVR);
 
 	if (mate) {
-		u8 mode2 = ide_get_best_pio_mode(mate, 255, 4);
+		u8 mode2 = mate->pio_mode - XFER_PIO_0;
 
 		if (mode2 < mode)
 			mode = mode2;
@@ -188,10 +188,11 @@
 	writel(val32, base + BK3710_REGRCVR);
 }
 
-static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed)
+static void palm_bk3710_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int is_slave = drive->dn & 1;
-	void __iomem *base = (void *)drive->hwif->dma_base;
+	void __iomem *base = (void *)hwif->dma_base;
+	const u8 xferspeed = drive->dma_mode;
 
 	if (xferspeed >= XFER_UDMA_0) {
 		palm_bk3710_setudmamode(base, is_slave,
@@ -203,12 +204,13 @@
 	}
 }
 
-static void palm_bk3710_set_pio_mode(ide_drive_t *drive, u8 pio)
+static void palm_bk3710_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned int cycle_time;
 	int is_slave = drive->dn & 1;
 	ide_drive_t *mate;
-	void __iomem *base = (void *)drive->hwif->dma_base;
+	void __iomem *base = (void *)hwif->dma_base;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * Obtain the drive PIO data for tuning the Palm Chip registers
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 65ba823..9546fe2 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -129,11 +129,11 @@
 	{ 0x1a, 0x01, 0xcb },	/* UDMA mode 6 */
 };
 
-static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void pdcnew_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 adj			= (drive->dn & 1) ? 0x08 : 0x00;
+	const u8 speed		= drive->dma_mode;
 
 	/*
 	 * IDE core issues SETFEATURES_XFER to the drive first (thanks to
@@ -167,11 +167,11 @@
  	}
 }
 
-static void pdcnew_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pdcnew_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u8 adj = (drive->dn & 1) ? 0x08 : 0x00;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	if (max_dma_rate(dev) == 4) {
 		set_indexed_reg(hwif, 0x0c + adj, pio_timings[pio].reg0c);
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 35161dd..c5f3841 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -1,7 +1,7 @@
 /*
  *  Copyright (C) 1998-2002		Andre Hedrick <andre@linux-ide.org>
  *  Copyright (C) 2006-2007, 2009	MontaVista Software, Inc.
- *  Copyright (C) 2007			Bartlomiej Zolnierkiewicz
+ *  Copyright (C) 2007-2010		Bartlomiej Zolnierkiewicz
  *
  *  Portions Copyright (C) 1999 Promise Technology, Inc.
  *  Author: Frank Tiernan (frankt@promise.com)
@@ -21,23 +21,15 @@
 
 #define DRV_NAME "pdc202xx_old"
 
-static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
-
-static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
+static void pdc202xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 drive_pci		= 0x60 + (drive->dn << 2);
+	const u8 speed		= drive->dma_mode;
 
 	u8			AP = 0, BP = 0, CP = 0;
 	u8			TA = 0, TB = 0, TC = 0;
 
-	/*
-	 * TODO: do this once per channel
-	 */
-	if (dev->device != PCI_DEVICE_ID_PROMISE_20246)
-		pdc_old_disable_66MHz_clock(hwif);
-
 	pci_read_config_byte(dev, drive_pci,     &AP);
 	pci_read_config_byte(dev, drive_pci + 1, &BP);
 	pci_read_config_byte(dev, drive_pci + 2, &CP);
@@ -84,9 +76,10 @@
 	}
 }
 
-static void pdc202xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	pdc202xx_set_mode(drive, XFER_PIO_0 + pio);
+	drive->dma_mode = drive->pio_mode;
+	pdc202xx_set_mode(hwif, drive);
 }
 
 static int pdc202xx_test_irq(ide_hwif_t *hwif)
@@ -100,13 +93,13 @@
 		 * bit 7: error, bit 6: interrupting,
 		 * bit 5: FIFO full, bit 4: FIFO empty
 		 */
-		return ((sc1d & 0x50) == 0x40) ? 1 : 0;
+		return ((sc1d & 0x50) == 0x50) ? 1 : 0;
 	} else	{
 		/*
 		 * bit 3: error, bit 2: interrupting,
 		 * bit 1: FIFO full, bit 0: FIFO empty
 		 */
-		return ((sc1d & 0x05) == 0x04) ? 1 : 0;
+		return ((sc1d & 0x05) == 0x05) ? 1 : 0;
 	}
 }
 
@@ -145,6 +138,11 @@
 	outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg);
 }
 
+static void pdc2026x_init_hwif(ide_hwif_t *hwif)
+{
+	pdc_old_disable_66MHz_clock(hwif);
+}
+
 static void pdc202xx_dma_start(ide_drive_t *drive)
 {
 	if (drive->current_speed > XFER_UDMA_2)
@@ -261,6 +259,7 @@
 	{ \
 		.name		= DRV_NAME, \
 		.init_chipset	= init_chipset_pdc202xx, \
+		.init_hwif	= pdc2026x_init_hwif, \
 		.port_ops	= &pdc2026x_port_ops, \
 		.dma_ops	= &pdc2026x_dma_ops, \
 		.host_flags	= IDE_HFLAGS_PDC202XX, \
@@ -356,6 +355,6 @@
 module_init(pdc202xx_ide_init);
 module_exit(pdc202xx_ide_exit);
 
-MODULE_AUTHOR("Andre Hedrick, Frank Tiernan");
+MODULE_AUTHOR("Andre Hedrick, Frank Tiernan, Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("PCI driver module for older Promise IDE");
 MODULE_LICENSE("GPL");
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index bf14f39..1bdca49 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -59,15 +59,14 @@
 
 /**
  *	piix_set_pio_mode	-	set host controller for PIO mode
+ *	@port: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode based upon the settings done by AMI BIOS.
  */
 
-static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -77,6 +76,7 @@
 	u8 slave_data;
 	static DEFINE_SPINLOCK(tune_lock);
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 				     /* ISP  RTC */
 	static const u8 timings[][2]= {
@@ -127,16 +127,15 @@
 
 /**
  *	piix_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Set a PIIX host controller to the desired DMA mode.  This involves
  *	programming the right timing data into the PCI configuration space.
  */
 
-static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void piix_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -147,6 +146,7 @@
 	int			sitre;
 	u16			reg4042, reg4a;
 	u8			reg48, reg54, reg55;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	sitre = (reg4042 & 0x4000) ? 1 : 0;
@@ -176,7 +176,6 @@
 			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
@@ -188,11 +187,12 @@
 			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		piix_set_pio_mode(drive, pio);
+		piix_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 7a4e788..850ee45 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -496,12 +496,11 @@
 /*
  * Old tuning functions (called on hdparm -p), sets up drive PIO timings
  */
-static void
-pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pmac_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio);
 	u32 *timings, t;
 	unsigned accessTicks, recTicks;
@@ -778,14 +777,14 @@
 #endif	
 }
 
-static void pmac_ide_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void pmac_ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
 	int ret = 0;
 	u32 *timings, *timings2, tl[2];
 	u8 unit = drive->dn & 1;
+	const u8 speed = drive->dma_mode;
 
 	timings = &pmif->timings[unit];
 	timings2 = &pmif->timings[unit+2];
@@ -1651,8 +1650,8 @@
 		if ((status & FLUSH) == 0)
 			break;
 		if (++timeout > 100) {
-			printk(KERN_WARNING "ide%d, ide_dma_test_irq \
-			timeout flushing channel\n", hwif->index);
+			printk(KERN_WARNING "ide%d, ide_dma_test_irq timeout flushing channel\n",
+			       hwif->index);
 			break;
 		}
 	}	
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index 74696ed..3f0244f 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -189,15 +189,13 @@
 	printk(KERN_DEBUG "%s: %#x\n", drive->name, timing);
 }
 
-static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void qd6500_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int active_time   = 175;
 	int recovery_time = 415; /* worst case values from the dos driver */
 
-	/*
-	 * FIXME: use "pio" value
-	 */
+	/* FIXME: use drive->pio_mode value */
 	if (!qd_find_disk_type(drive, &active_time, &recovery_time) &&
 	    (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) &&
 	    id[ATA_ID_EIDE_PIO] >= 240) {
@@ -211,9 +209,9 @@
 				active_time, recovery_time));
 }
 
-static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void qd6580_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 	unsigned int cycle_time;
 	int active_time   = 175;
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index d467478..134f1fd 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -122,13 +122,13 @@
 	return mask;
 }
 
-static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void sc1200_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t		*hwif = drive->hwif;
 	struct pci_dev		*dev = to_pci_dev(hwif->dev);
 	unsigned int		reg, timings;
 	unsigned short		pci_clock;
 	unsigned int		basereg = hwif->channel ? 0x50 : 0x40;
+	const u8		mode = drive->dma_mode;
 
 	static const u32 udma_timing[3][3] = {
 		{ 0x00921250, 0x00911140, 0x00911030 },
@@ -193,10 +193,10 @@
  * will have valid default PIO timings set up before we get here.
  */
 
-static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sc1200_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t	*hwif = drive->hwif;
 	int		mode = -1;
+	const u8	pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * bad abuse of ->set_pio_mode interface
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 1104bb3..b7f5b0c 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -199,16 +199,15 @@
 
 /**
  *	scc_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -216,6 +215,7 @@
 	unsigned long pioct_port = ctl_base + 0x004;
 	unsigned long reg;
 	int offset;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	reg = in_be32((void __iomem *)cckctrl_port);
 	if (reg & CCKCTRL_ATACLKOEN) {
@@ -231,16 +231,15 @@
 
 /**
  *	scc_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void scc_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -254,6 +253,7 @@
 	int offset, idx;
 	unsigned long reg;
 	unsigned long jcactsel;
+	const u8 speed = drive->dma_mode;
 
 	reg = in_be32((void __iomem *)cckctrl_port);
 	if (reg & CCKCTRL_ATACLKOEN) {
@@ -872,20 +872,18 @@
 	.remove = __devexit_p(scc_remove),
 };
 
-static int scc_ide_init(void)
+static int __init scc_ide_init(void)
 {
 	return ide_pci_register_driver(&scc_pci_driver);
 }
 
-module_init(scc_ide_init);
-/* -- No exit code?
-static void scc_ide_exit(void)
+static void __exit scc_ide_exit(void)
 {
-	ide_pci_unregister_driver(&scc_pci_driver);
+	pci_unregister_driver(&scc_pci_driver);
 }
-module_exit(scc_ide_exit);
- */
 
+module_init(scc_ide_init);
+module_exit(scc_ide_exit);
 
 MODULE_DESCRIPTION("PCI driver module for Toshiba SCC IDE");
 MODULE_LICENSE("GPL");
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index b6554ef..35fb8da 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -2,7 +2,7 @@
  * Copyright (C) 1998-2000 Michel Aubry
  * Copyright (C) 1998-2000 Andrzej Krzysztofowicz
  * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
- * Copyright (C)      2007 Bartlomiej Zolnierkiewicz
+ * Copyright (C) 2007-2010 Bartlomiej Zolnierkiewicz
  * Portions copyright (c) 2001 Sun Microsystems
  *
  *
@@ -52,8 +52,6 @@
 	NULL
 };
 
-static struct pci_dev *isa_dev;
-
 static int check_in_drive_lists (ide_drive_t *drive, const char **list)
 {
 	char *m = (char *)&drive->id[ATA_ID_PROD];
@@ -67,26 +65,14 @@
 static u8 svwks_udma_filter(ide_drive_t *drive)
 {
 	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u8 mask = 0;
 
-	if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE)
+	if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE) {
 		return 0x1f;
-	if (dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) {
-		u32 reg = 0;
-		if (isa_dev)
-			pci_read_config_dword(isa_dev, 0x64, &reg);
-			
-		/*
-		 *	Don't enable UDMA on disk devices for the moment
-		 */
-		if(drive->media == ide_disk)
-			return 0;
-		/* Check the OSB4 DMA33 enable bit */
-		return ((reg & 0x00004000) == 0x00004000) ? 0x07 : 0;
 	} else if (dev->revision < SVWKS_CSB5_REVISION_NEW) {
 		return 0x07;
-	} else if (dev->revision >= SVWKS_CSB5_REVISION_NEW) {
-		u8 btr = 0, mode;
+	} else {
+		u8 btr = 0, mode, mask;
+
 		pci_read_config_byte(dev, 0x5A, &btr);
 		mode = btr & 0x3;
 
@@ -101,13 +87,9 @@
 		case 1:	 mask = 0x07; break;
 		default: mask = 0x00; break;
 		}
-	}
-	if (((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) ||
-	     (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) &&
-	    (!(PCI_FUNC(dev->devfn) & 1)))
-		mask = 0x1f;
 
-	return mask;
+		return mask;
+	}
 }
 
 static u8 svwks_csb_check (struct pci_dev *dev)
@@ -124,12 +106,13 @@
 	return 0;
 }
 
-static void svwks_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 pio_modes[] = { 0x5d, 0x47, 0x34, 0x22, 0x20 };
 	static const u8 drive_pci[] = { 0x41, 0x40, 0x43, 0x42 };
 
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]);
 
@@ -145,14 +128,14 @@
 	}
 }
 
-static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 udma_modes[]		= { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05 };
 	static const u8 dma_modes[]		= { 0x77, 0x21, 0x20 };
 	static const u8 drive_pci2[]		= { 0x45, 0x44, 0x47, 0x46 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	const u8 speed		= drive->dma_mode;
 	u8 unit			= drive->dn & 1;
 
 	u8 ultra_enable	 = 0, ultra_timing = 0, dma_timing = 0;
@@ -185,8 +168,9 @@
 
 	/* OSB4 : South Bridge and IDE */
 	if (dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) {
-		isa_dev = pci_get_device(PCI_VENDOR_ID_SERVERWORKS,
-			  PCI_DEVICE_ID_SERVERWORKS_OSB4, NULL);
+		struct pci_dev *isa_dev =
+			pci_get_device(PCI_VENDOR_ID_SERVERWORKS,
+					PCI_DEVICE_ID_SERVERWORKS_OSB4, NULL);
 		if (isa_dev) {
 			pci_read_config_dword(isa_dev, 0x64, &reg);
 			reg &= ~0x00002000; /* disable 600ns interrupt mask */
@@ -195,6 +179,7 @@
 					"enabled.\n", pci_name(dev));
 			reg |=  0x00004000; /* enable UDMA/33 support */
 			pci_write_config_dword(isa_dev, 0x64, reg);
+			pci_dev_put(isa_dev);
 		}
 	}
 
@@ -343,7 +328,6 @@
 static const struct ide_port_ops osb4_port_ops = {
 	.set_pio_mode		= svwks_set_pio_mode,
 	.set_dma_mode		= svwks_set_dma_mode,
-	.udma_filter		= svwks_udma_filter,
 };
 
 static const struct ide_port_ops svwks_port_ops = {
@@ -460,6 +444,6 @@
 module_init(svwks_ide_init);
 module_exit(svwks_ide_exit);
 
-MODULE_AUTHOR("Michael Aubry. Andrzej Krzysztofowicz, Andre Hedrick");
+MODULE_AUTHOR("Michael Aubry. Andrzej Krzysztofowicz, Andre Hedrick, Bartlomiej Zolnierkiewicz");
 MODULE_DESCRIPTION("PCI driver module for Serverworks OSB4/CSB5/CSB6 IDE");
 MODULE_LICENSE("GPL");
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index b7d61dc..e3ea591 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -255,7 +255,7 @@
 	return dma_stat;
 }
 
-static void sgiioc4_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sgiioc4_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index d95df52..ddeda44 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -229,19 +229,18 @@
 
 /**
  *	sil_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
+static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u16 tf_speed[]   = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 };
 	static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	ide_drive_t *pair	= ide_get_pair_dev(drive);
 	u32 speedt		= 0;
@@ -249,6 +248,7 @@
 	unsigned long addr	= siimage_seldev(drive, 0x04);
 	unsigned long tfaddr	= siimage_selreg(hwif,	0x02);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
+	const u8 pio		= drive->pio_mode - XFER_PIO_0;
 	u8 tf_pio		= pio;
 	u8 mmio			= (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
 	u8 addr_mask		= hwif->channel ? (mmio ? 0xF4 : 0x84)
@@ -258,7 +258,7 @@
 
 	/* trim *taskfile* PIO to the slowest of the master/slave */
 	if (pair) {
-		u8 pair_pio = ide_get_best_pio_mode(pair, 255, 4);
+		u8 pair_pio = pair->pio_mode - XFER_PIO_0;
 
 		if (pair_pio < tf_pio)
 			tf_pio = pair_pio;
@@ -289,19 +289,18 @@
 
 /**
  *	sil_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the SiI chipset for the desired DMA mode.
  */
 
-static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sil_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 ultra6[] = { 0x0F, 0x0B, 0x07, 0x05, 0x03, 0x02, 0x01 };
 	static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 };
 	static const u16 dma[]	 = { 0x2208, 0x10C2, 0x10C1 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
 	u16 ultra = 0, multi	= 0;
@@ -311,6 +310,7 @@
 						: (mmio ? 0xB4 : 0x80);
 	unsigned long ma	= siimage_seldev(drive, 0x08);
 	unsigned long ua	= siimage_seldev(drive, 0x0C);
+	const u8 speed		= drive->dma_mode;
 
 	scsc  = sil_ioread8 (dev, base + (mmio ? 0x4A : 0x8A));
 	mode  = sil_ioread8 (dev, base + addr_mask);
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index 4687060..db7f4e7 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -290,10 +290,10 @@
 		pci_write_config_byte(dev, 0x4b, rw_prefetch);
 }
 
-static void sis_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sis_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	config_drive_art_rwp(drive);
-	sis_program_timings(drive, XFER_PIO_0 + pio);
+	sis_program_timings(drive, drive->pio_mode);
 }
 
 static void sis_ata133_program_udma_timings(ide_drive_t *drive, const u8 mode)
@@ -340,8 +340,10 @@
 		sis_ata33_program_udma_timings(drive, mode);
 }
 
-static void sis_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sis_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 speed = drive->dma_mode;
+
 	if (speed >= XFER_UDMA_0)
 		sis_program_udma_timings(drive, speed);
 	else
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 3c2bbf0..f21dc2a 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -63,12 +63,13 @@
 /*
  * Configure the chipset for PIO mode.
  */
-static void sl82c105_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev	= to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long timings	= (unsigned long)ide_get_drivedata(drive);
 	int reg			= 0x44 + drive->dn * 4;
 	u16 drv_ctrl;
+	const u8 pio		= drive->pio_mode - XFER_PIO_0;
 
 	drv_ctrl = get_pio_timings(drive, pio);
 
@@ -91,11 +92,12 @@
 /*
  * Configure the chipset for DMA mode.
  */
-static void sl82c105_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sl82c105_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static u16 mwdma_timings[] = {0x0707, 0x0201, 0x0200};
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u16 drv_ctrl;
+	const u8 speed = drive->dma_mode;
 
 	drv_ctrl = mwdma_timings[speed - XFER_MW_DMA_0];
 
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 1ccfb40..864ffe0 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -18,9 +18,8 @@
 
 static DEFINE_SPINLOCK(slc90e66_lock);
 
-static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -29,6 +28,8 @@
 	u16 master_data;
 	u8 slave_data;
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
+
 				     /* ISP  RTC */
 	static const u8 timings[][2] = {
 					{ 0, 0 },
@@ -71,14 +72,14 @@
 	spin_unlock_irqrestore(&slc90e66_lock, flags);
 }
 
-static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void slc90e66_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int sitre = 0, a_speed	= 7 << (drive->dn * 4);
 	int u_speed = 0, u_flag = 1 << drive->dn;
 	u16			reg4042, reg44, reg48, reg4a;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	sitre = (reg4042 & 0x4000) ? 1 : 0;
@@ -98,7 +99,6 @@
 		}
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_word(dev, 0x48, reg48 & ~u_flag);
@@ -106,11 +106,12 @@
 			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		slc90e66_set_pio_mode(drive, pio);
+		slc90e66_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index 05a93d6..e444d24 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -13,11 +13,11 @@
 
 #define DRV_NAME "tc86c001"
 
-static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
+static void tc86c001_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long scr_port	= hwif->config_data + (drive->dn ? 0x02 : 0x00);
 	u16 mode, scr		= inw(scr_port);
+	const u8 speed		= drive->dma_mode;
 
 	switch (speed) {
 	case XFER_UDMA_4:	mode = 0x00c0; break;
@@ -41,9 +41,10 @@
 	outw(scr, scr_port);
 }
 
-static void tc86c001_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	tc86c001_set_mode(drive, XFER_PIO_0 + pio);
+	drive->dma_mode = drive->pio_mode;
+	tc86c001_set_mode(hwif, drive);
 }
 
 /*
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index 8773c3b..7953447 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -34,9 +34,8 @@
 
 #define DRV_NAME "triflex"
 
-static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
+static void triflex_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u32 triflex_timings = 0;
 	u16 timing = 0;
@@ -44,7 +43,7 @@
 
 	pci_read_config_dword(dev, channel_offset, &triflex_timings);
 
-	switch(speed) {
+	switch (drive->dma_mode) {
 		case XFER_MW_DMA_2:
 			timing = 0x0103; 
 			break;
@@ -82,9 +81,10 @@
 	pci_write_config_dword(dev, channel_offset, triflex_timings);
 }
 
-static void triflex_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	triflex_set_mode(drive, XFER_PIO_0 + pio);
+	drive->dma_mode = drive->pio_mode;
+	triflex_set_mode(hwif, drive);
 }
 
 static const struct ide_port_ops triflex_port_ops = {
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index fd59c0d..1d80f1f 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -56,16 +56,15 @@
 		     &tx4938_ebuscptr->cr[ebus_ch]);
 }
 
-static void tx4938ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tx4938ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct tx4938ide_platform_info *pdata = hwif->dev->platform_data;
-	u8 safe = pio;
+	u8 safe = drive->pio_mode - XFER_PIO_0;
 	ide_drive_t *pair;
 
 	pair = ide_get_pair_dev(drive);
 	if (pair)
-		safe = min(safe, ide_get_best_pio_mode(pair, 255, 5));
+		safe = min(safe, pair->pio_mode - XFER_PIO_0);
 	tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, safe);
 }
 
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 64b58ec..3c73677 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -104,17 +104,17 @@
 
 #define TX4939IDE_BASE(hwif)	((void __iomem *)(hwif)->extra_base)
 
-static void tx4939ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	int is_slave = drive->dn;
 	u32 mask, val;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 safe = pio;
 	ide_drive_t *pair;
 
 	pair = ide_get_pair_dev(drive);
 	if (pair)
-		safe = min(safe, ide_get_best_pio_mode(pair, 255, 4));
+		safe = min(safe, pair->pio_mode - XFER_PIO_0);
 	/*
 	 * Update Command Transfer Mode for master/slave and Data
 	 * Transfer Mode for this drive.
@@ -125,10 +125,10 @@
 	/* tx4939ide_tf_load_fixup() will set the Sys_Ctl register */
 }
 
-static void tx4939ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void tx4939ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	u32 mask, val;
+	const u8 mode = drive->dma_mode;
 
 	/* Update Data Transfer Mode for this drive. */
 	if (mode >= XFER_UDMA_0)
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 60f936e..47adcd0 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -104,10 +104,11 @@
 		speeds[0], speeds[1], speeds[2], speeds[3]);
 }
 
-static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void umc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate;
+	ide_hwif_t *mate = hwif->mate;
 	unsigned long uninitialized_var(flags);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
 		drive->name, pio, pio_to_umc[pio]);
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 028de26..e65d010 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -6,7 +6,7 @@
  *   vt8235, vt8237, vt8237a
  *
  * Copyright (c) 2000-2002 Vojtech Pavlik
- * Copyright (c) 2007 Bartlomiej Zolnierkiewicz
+ * Copyright (c) 2007-2010 Bartlomiej Zolnierkiewicz
  *
  * Based on the work of:
  *	Michel Aubry
@@ -54,6 +54,11 @@
 #define VIA_NO_UNMASK		0x08 /* Doesn't work with IRQ unmasking on */
 #define VIA_BAD_ID		0x10 /* Has wrong vendor ID (0x1107) */
 #define VIA_BAD_AST		0x20 /* Don't touch Address Setup Timing */
+#define VIA_SATA_PATA		0x80 /* SATA/PATA combined configuration */
+
+enum {
+	VIA_IDFLAG_SINGLE = (1 << 1), /* single channel controller */
+};
 
 /*
  * VIA SouthBridge chips.
@@ -67,11 +72,13 @@
 	u8 udma_mask;
 	u8 flags;
 } via_isa_bridges[] = {
-	{ "vx855",	PCI_DEVICE_ID_VIA_VX855,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vx800",	PCI_DEVICE_ID_VIA_VX800,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "cx700",	PCI_DEVICE_ID_VIA_CX700,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+	{ "vx855",	PCI_DEVICE_ID_VIA_VX855,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST | VIA_SATA_PATA },
+	{ "vx800",	PCI_DEVICE_ID_VIA_VX800,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST | VIA_SATA_PATA },
+	{ "cx700",	PCI_DEVICE_ID_VIA_CX700,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST | VIA_SATA_PATA },
+	{ "vt8261",	PCI_DEVICE_ID_VIA_8261,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ "vt8237s",	PCI_DEVICE_ID_VIA_8237S,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ "vt6410",	PCI_DEVICE_ID_VIA_6410,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
+	{ "vt6415",	PCI_DEVICE_ID_VIA_6410,     0x00, 0xff, ATA_UDMA6, VIA_BAD_AST },
 	{ "vt8251",	PCI_DEVICE_ID_VIA_8251,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ "vt8237",	PCI_DEVICE_ID_VIA_8237,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ "vt8237a",	PCI_DEVICE_ID_VIA_8237A,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
@@ -92,6 +99,7 @@
 	{ "vt82c586",	PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f,      0x00, VIA_SET_FIFO },
 	{ "vt82c576",	PCI_DEVICE_ID_VIA_82C576,   0x00, 0x2f,      0x00, VIA_SET_FIFO | VIA_NO_UNMASK },
 	{ "vt82c576",	PCI_DEVICE_ID_VIA_82C576,   0x00, 0x2f,      0x00, VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID },
+	{ "vtxxxx",	PCI_DEVICE_ID_VIA_ANON,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
 	{ NULL }
 };
 
@@ -102,6 +110,7 @@
 {
 	struct via_isa_bridge *via_config;
 	unsigned int via_80w;
+	u8 cached_device[2];
 };
 
 /**
@@ -137,30 +146,45 @@
 	case ATA_UDMA4: t = timing->udma ? (0xe8 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x0f; break;
 	case ATA_UDMA5: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
 	case ATA_UDMA6: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
-	default: return;
 	}
 
-	pci_write_config_byte(dev, VIA_UDMA_TIMING + (3 - dn), t);
+	/* Set UDMA unless device is not UDMA capable */
+	if (vdev->via_config->udma_mask) {
+		u8 udma_etc;
+
+		pci_read_config_byte(dev, VIA_UDMA_TIMING + 3 - dn, &udma_etc);
+
+		/* clear transfer mode bit */
+		udma_etc &= ~0x20;
+
+		if (timing->udma) {
+			/* preserve 80-wire cable detection bit */
+			udma_etc &= 0x10;
+			udma_etc |= t;
+		}
+
+		pci_write_config_byte(dev, VIA_UDMA_TIMING + 3 - dn, udma_etc);
+	}
 }
 
 /**
  *	via_set_drive		-	configure transfer mode
+ *	@hwif: port
  *	@drive: Drive to set up
- *	@speed: desired speed
  *
  *	via_set_drive() computes timing values configures the chipset to
  *	a desired transfer mode.  It also can be called by upper layers.
  */
 
-static void via_set_drive(ide_drive_t *drive, const u8 speed)
+static void via_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *peer = ide_get_pair_dev(drive);
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	struct ide_host *host = pci_get_drvdata(dev);
 	struct via82cxxx_dev *vdev = host->host_priv;
 	struct ide_timing t, p;
 	unsigned int T, UT;
+	const u8 speed = drive->dma_mode;
 
 	T = 1000000000 / via_clock;
 
@@ -175,7 +199,7 @@
 	ide_timing_compute(drive, speed, &t, T, UT);
 
 	if (peer) {
-		ide_timing_compute(peer, peer->current_speed, &p, T, UT);
+		ide_timing_compute(peer, peer->pio_mode, &p, T, UT);
 		ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
 	}
 
@@ -184,22 +208,24 @@
 
 /**
  *	via_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	A callback from the upper layers for PIO-only tuning.
  */
 
-static void via_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	via_set_drive(drive, XFER_PIO_0 + pio);
+	drive->dma_mode = drive->pio_mode;
+	via_set_drive(hwif, drive);
 }
 
 static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
 {
 	struct via_isa_bridge *via_config;
 
-	for (via_config = via_isa_bridges; via_config->id; via_config++)
+	for (via_config = via_isa_bridges;
+	     via_config->id != PCI_DEVICE_ID_VIA_ANON; via_config++)
 		if ((*isa = pci_get_device(PCI_VENDOR_ID_VIA +
 			!!(via_config->flags & VIA_BAD_ID),
 			via_config->id, NULL))) {
@@ -362,6 +388,9 @@
 	if (via_cable_override(pdev))
 		return ATA_CBL_PATA40_SHORT;
 
+	if ((vdev->via_config->flags & VIA_SATA_PATA) && hwif->channel == 0)
+		return ATA_CBL_SATA;
+
 	if ((vdev->via_80w >> hwif->channel) & 1)
 		return ATA_CBL_PATA80;
 	else
@@ -374,10 +403,66 @@
 	.cable_detect		= via82cxxx_cable_detect,
 };
 
+static void via_write_devctl(ide_hwif_t *hwif, u8 ctl)
+{
+	struct via82cxxx_dev *vdev = hwif->host->host_priv;
+
+	outb(ctl, hwif->io_ports.ctl_addr);
+	outb(vdev->cached_device[hwif->channel], hwif->io_ports.device_addr);
+}
+
+static void __via_dev_select(ide_drive_t *drive, u8 select)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct via82cxxx_dev *vdev = hwif->host->host_priv;
+
+	outb(select, hwif->io_ports.device_addr);
+	vdev->cached_device[hwif->channel] = select;
+}
+
+static void via_dev_select(ide_drive_t *drive)
+{
+	__via_dev_select(drive, drive->select | ATA_DEVICE_OBS);
+}
+
+static void via_tf_load(ide_drive_t *drive, struct ide_taskfile *tf, u8 valid)
+{
+	ide_hwif_t *hwif = drive->hwif;
+	struct ide_io_ports *io_ports = &hwif->io_ports;
+
+	if (valid & IDE_VALID_FEATURE)
+		outb(tf->feature, io_ports->feature_addr);
+	if (valid & IDE_VALID_NSECT)
+		outb(tf->nsect, io_ports->nsect_addr);
+	if (valid & IDE_VALID_LBAL)
+		outb(tf->lbal, io_ports->lbal_addr);
+	if (valid & IDE_VALID_LBAM)
+		outb(tf->lbam, io_ports->lbam_addr);
+	if (valid & IDE_VALID_LBAH)
+		outb(tf->lbah, io_ports->lbah_addr);
+	if (valid & IDE_VALID_DEVICE)
+		__via_dev_select(drive, tf->device);
+}
+
+const struct ide_tp_ops via_tp_ops = {
+	.exec_command		= ide_exec_command,
+	.read_status		= ide_read_status,
+	.read_altstatus		= ide_read_altstatus,
+	.write_devctl		= via_write_devctl,
+
+	.dev_select		= via_dev_select,
+	.tf_load		= via_tf_load,
+	.tf_read		= ide_tf_read,
+
+	.input_data		= ide_input_data,
+	.output_data		= ide_output_data,
+};
+
 static const struct ide_port_info via82cxxx_chipset __devinitdata = {
 	.name		= DRV_NAME,
 	.init_chipset	= init_chipset_via82cxxx,
 	.enablebits	= { { 0x40, 0x02, 0x02 }, { 0x40, 0x01, 0x01 } },
+	.tp_ops		= &via_tp_ops,
 	.port_ops	= &via_port_ops,
 	.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
 			  IDE_HFLAG_POST_SET_MODE |
@@ -402,11 +487,6 @@
 	 * Find the ISA bridge and check we know what it is.
 	 */
 	via_config = via_config_find(&isa);
-	if (!via_config->id) {
-		printk(KERN_WARNING DRV_NAME " %s: unknown chipset, skipping\n",
-			pci_name(dev));
-		return -ENODEV;
-	}
 
 	/*
 	 * Print the boot message.
@@ -436,10 +516,13 @@
 		via_clock = 33333;
 	}
 
-	if (idx == 0)
-		d.host_flags |= IDE_HFLAG_NO_AUTODMA;
-	else
+	if (idx == 1)
 		d.enablebits[1].reg = d.enablebits[0].reg = 0;
+	else
+		d.host_flags |= IDE_HFLAG_NO_AUTODMA;
+
+	if (idx == VIA_IDFLAG_SINGLE)
+		d.host_flags |= IDE_HFLAG_SINGLE;
 
 	if ((via_config->flags & VIA_NO_UNMASK) == 0)
 		d.host_flags |= IDE_HFLAG_UNMASK_IRQS;
@@ -475,8 +558,9 @@
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1),  0 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1),  0 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_CX700_IDE), 0 },
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_VX855_IDE), 0 },
+	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_VX855_IDE), VIA_IDFLAG_SINGLE },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6410),      1 },
+	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6415),      1 },
 	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), 1 },
 	{ 0, },
 };
@@ -504,6 +588,6 @@
 module_init(via_ide_init);
 module_exit(via_ide_exit);
 
-MODULE_AUTHOR("Vojtech Pavlik, Michel Aubry, Jeff Garzik, Andre Hedrick");
+MODULE_AUTHOR("Vojtech Pavlik, Bartlomiej Zolnierkiewicz, Michel Aubry, Jeff Garzik, Andre Hedrick");
 MODULE_DESCRIPTION("PCI driver module for VIA IDE");
 MODULE_LICENSE("GPL");
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index e54d9ac..a078e56 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -146,7 +146,7 @@
 void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
 
 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
-					int is_async, int *fd);
+					int is_async);
 struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
 
 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 112d397..f71cf13 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -301,10 +301,15 @@
 
 	resp.num_comp_vectors = file->device->num_comp_vectors;
 
-	filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
+	ret = get_unused_fd();
+	if (ret < 0)
+		goto err_free;
+	resp.async_fd = ret;
+
+	filp = ib_uverbs_alloc_event_file(file, 1);
 	if (IS_ERR(filp)) {
 		ret = PTR_ERR(filp);
-		goto err_free;
+		goto err_fd;
 	}
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
@@ -332,9 +337,11 @@
 	return in_len;
 
 err_file:
-	put_unused_fd(resp.async_fd);
 	fput(filp);
 
+err_fd:
+	put_unused_fd(resp.async_fd);
+
 err_free:
 	ibdev->dealloc_ucontext(ucontext);
 
@@ -715,6 +722,7 @@
 	struct ib_uverbs_create_comp_channel	   cmd;
 	struct ib_uverbs_create_comp_channel_resp  resp;
 	struct file				  *filp;
+	int ret;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -722,9 +730,16 @@
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
-	if (IS_ERR(filp))
+	ret = get_unused_fd();
+	if (ret < 0)
+		return ret;
+	resp.fd = ret;
+
+	filp = ib_uverbs_alloc_event_file(file, 0);
+	if (IS_ERR(filp)) {
+		put_unused_fd(resp.fd);
 		return PTR_ERR(filp);
+	}
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index ff59a79..4fa2e65 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -484,11 +484,10 @@
 }
 
 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
-					int is_async, int *fd)
+					int is_async)
 {
 	struct ib_uverbs_event_file *ev_file;
 	struct file *filp;
-	int ret;
 
 	ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
 	if (!ev_file)
@@ -503,27 +502,12 @@
 	ev_file->is_async    = is_async;
 	ev_file->is_closed   = 0;
 
-	*fd = get_unused_fd();
-	if (*fd < 0) {
-		ret = *fd;
-		goto err;
-	}
-
-	filp = anon_inode_getfile("[uverbs-event]", &uverbs_event_fops,
+	filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
 				  ev_file, O_RDONLY);
-	if (!filp) {
-		ret = -ENFILE;
-		goto err_fd;
-	}
+	if (IS_ERR(filp))
+		kfree(ev_file);
 
 	return filp;
-
-err_fd:
-	put_unused_fd(*fd);
-
-err:
-	kfree(ev_file);
-	return ERR_PTR(ret);
 }
 
 /*
diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig
index 0a6601c..d189e47 100644
--- a/drivers/pcmcia/Kconfig
+++ b/drivers/pcmcia/Kconfig
@@ -51,17 +51,23 @@
 
 config PCMCIA_IOCTL
 	bool "PCMCIA control ioctl (obsolete)"
-	depends on PCMCIA
+	depends on PCMCIA && ARM && !SMP && !PREEMPT
 	default y
 	help
 	  If you say Y here, the deprecated ioctl interface to the PCMCIA
-	  subsystem will be built. It is needed by cardmgr and cardctl
-	  (pcmcia-cs) to function properly.
+	  subsystem will be built. It is needed by the deprecated pcmcia-cs
+	  tools (cardmgr, cardctl) to function properly.
 
 	  You should use the new pcmciautils package instead (see
 	  <file:Documentation/Changes> for location and details).
 
-	  If unsure, say Y.
+	  This config option will most likely be removed from kernel 2.6.35,
+	  the associated code from kernel 2.6.36.
+
+	  As the PCMCIA ioctl is not locking safe, it depends on !SMP and
+	  !PREEMPT.
+
+	  If unsure, say N.
 
 config CARDBUS
 	bool "32-bit CardBus support"
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index ac0686e..e6ab2a4 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -71,7 +71,7 @@
 	unsigned int max, pass;
 
 	s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0));
-	pci_fixup_cardbus(bus); 
+	pci_fixup_cardbus(bus);
 
 	max = bus->secondary;
 	for (pass = 0; pass < 2; pass++)
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index 2f3622d..f230f65 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -54,46 +54,44 @@
 /* Upper limit on reasonable # of tuples */
 #define MAX_TUPLES		200
 
-/*====================================================================*/
-
-/* Parameters that can be set with 'insmod' */
-
 /* 16-bit CIS? */
 static int cis_width;
 module_param(cis_width, int, 0444);
 
 void release_cis_mem(struct pcmcia_socket *s)
 {
-    mutex_lock(&s->ops_mutex);
-    if (s->cis_mem.flags & MAP_ACTIVE) {
-	s->cis_mem.flags &= ~MAP_ACTIVE;
-	s->ops->set_mem_map(s, &s->cis_mem);
-	if (s->cis_mem.res) {
-	    release_resource(s->cis_mem.res);
-	    kfree(s->cis_mem.res);
-	    s->cis_mem.res = NULL;
+	mutex_lock(&s->ops_mutex);
+	if (s->cis_mem.flags & MAP_ACTIVE) {
+		s->cis_mem.flags &= ~MAP_ACTIVE;
+		s->ops->set_mem_map(s, &s->cis_mem);
+		if (s->cis_mem.res) {
+			release_resource(s->cis_mem.res);
+			kfree(s->cis_mem.res);
+			s->cis_mem.res = NULL;
+		}
+		iounmap(s->cis_virt);
+		s->cis_virt = NULL;
 	}
-	iounmap(s->cis_virt);
-	s->cis_virt = NULL;
-    }
-    mutex_unlock(&s->ops_mutex);
+	mutex_unlock(&s->ops_mutex);
 }
 
-/*
- * Map the card memory at "card_offset" into virtual space.
+/**
+ * set_cis_map() - map the card memory at "card_offset" into virtual space.
+ *
  * If flags & MAP_ATTRIB, map the attribute space, otherwise
  * map the memory space.
  *
  * Must be called with ops_mutex held.
  */
-static void __iomem *
-set_cis_map(struct pcmcia_socket *s, unsigned int card_offset, unsigned int flags)
+static void __iomem *set_cis_map(struct pcmcia_socket *s,
+				unsigned int card_offset, unsigned int flags)
 {
 	pccard_mem_map *mem = &s->cis_mem;
 	int ret;
 
 	if (!(s->features & SS_CAP_STATIC_MAP) && (mem->res == NULL)) {
-		mem->res = pcmcia_find_mem_region(0, s->map_size, s->map_size, 0, s);
+		mem->res = pcmcia_find_mem_region(0, s->map_size,
+						s->map_size, 0, s);
 		if (mem->res == NULL) {
 			dev_printk(KERN_NOTICE, &s->dev,
 				   "cs: unable to map card memory!\n");
@@ -124,165 +122,170 @@
 	return s->cis_virt;
 }
 
-/*======================================================================
-
-    Low-level functions to read and write CIS memory.  I think the
-    write routine is only useful for writing one-byte registers.
-
-======================================================================*/
 
 /* Bits in attr field */
 #define IS_ATTR		1
 #define IS_INDIRECT	8
 
+/**
+ * pcmcia_read_cis_mem() - low-level function to read CIS memory
+ */
 int pcmcia_read_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
 		 u_int len, void *ptr)
 {
-    void __iomem *sys, *end;
-    unsigned char *buf = ptr;
+	void __iomem *sys, *end;
+	unsigned char *buf = ptr;
 
-    dev_dbg(&s->dev, "pcmcia_read_cis_mem(%d, %#x, %u)\n", attr, addr, len);
+	dev_dbg(&s->dev, "pcmcia_read_cis_mem(%d, %#x, %u)\n", attr, addr, len);
 
-    mutex_lock(&s->ops_mutex);
-    if (attr & IS_INDIRECT) {
-	/* Indirect accesses use a bunch of special registers at fixed
-	   locations in common memory */
-	u_char flags = ICTRL0_COMMON|ICTRL0_AUTOINC|ICTRL0_BYTEGRAN;
-	if (attr & IS_ATTR) {
-	    addr *= 2;
-	    flags = ICTRL0_AUTOINC;
+	mutex_lock(&s->ops_mutex);
+	if (attr & IS_INDIRECT) {
+		/* Indirect accesses use a bunch of special registers at fixed
+		   locations in common memory */
+		u_char flags = ICTRL0_COMMON|ICTRL0_AUTOINC|ICTRL0_BYTEGRAN;
+		if (attr & IS_ATTR) {
+			addr *= 2;
+			flags = ICTRL0_AUTOINC;
+		}
+
+		sys = set_cis_map(s, 0, MAP_ACTIVE |
+				((cis_width) ? MAP_16BIT : 0));
+		if (!sys) {
+			dev_dbg(&s->dev, "could not map memory\n");
+			memset(ptr, 0xff, len);
+			mutex_unlock(&s->ops_mutex);
+			return -1;
+		}
+
+		writeb(flags, sys+CISREG_ICTRL0);
+		writeb(addr & 0xff, sys+CISREG_IADDR0);
+		writeb((addr>>8) & 0xff, sys+CISREG_IADDR1);
+		writeb((addr>>16) & 0xff, sys+CISREG_IADDR2);
+		writeb((addr>>24) & 0xff, sys+CISREG_IADDR3);
+		for ( ; len > 0; len--, buf++)
+			*buf = readb(sys+CISREG_IDATA0);
+	} else {
+		u_int inc = 1, card_offset, flags;
+
+		if (addr > CISTPL_MAX_CIS_SIZE)
+			dev_dbg(&s->dev,
+				"attempt to read CIS mem at addr %#x", addr);
+
+		flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
+		if (attr) {
+			flags |= MAP_ATTRIB;
+			inc++;
+			addr *= 2;
+		}
+
+		card_offset = addr & ~(s->map_size-1);
+		while (len) {
+			sys = set_cis_map(s, card_offset, flags);
+			if (!sys) {
+				dev_dbg(&s->dev, "could not map memory\n");
+				memset(ptr, 0xff, len);
+				mutex_unlock(&s->ops_mutex);
+				return -1;
+			}
+			end = sys + s->map_size;
+			sys = sys + (addr & (s->map_size-1));
+			for ( ; len > 0; len--, buf++, sys += inc) {
+				if (sys == end)
+					break;
+				*buf = readb(sys);
+			}
+			card_offset += s->map_size;
+			addr = 0;
+		}
 	}
-
-	sys = set_cis_map(s, 0, MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0));
-	if (!sys) {
-	    dev_dbg(&s->dev, "could not map memory\n");
-	    memset(ptr, 0xff, len);
-	    mutex_unlock(&s->ops_mutex);
-	    return -1;
-	}
-
-	writeb(flags, sys+CISREG_ICTRL0);
-	writeb(addr & 0xff, sys+CISREG_IADDR0);
-	writeb((addr>>8) & 0xff, sys+CISREG_IADDR1);
-	writeb((addr>>16) & 0xff, sys+CISREG_IADDR2);
-	writeb((addr>>24) & 0xff, sys+CISREG_IADDR3);
-	for ( ; len > 0; len--, buf++)
-	    *buf = readb(sys+CISREG_IDATA0);
-    } else {
-	u_int inc = 1, card_offset, flags;
-
-	if (addr > CISTPL_MAX_CIS_SIZE)
-		dev_dbg(&s->dev, "attempt to read CIS mem at addr %#x", addr);
-
-	flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
-	if (attr) {
-	    flags |= MAP_ATTRIB;
-	    inc++;
-	    addr *= 2;
-	}
-
-	card_offset = addr & ~(s->map_size-1);
-	while (len) {
-	    sys = set_cis_map(s, card_offset, flags);
-	    if (!sys) {
-		dev_dbg(&s->dev, "could not map memory\n");
-		memset(ptr, 0xff, len);
-		mutex_unlock(&s->ops_mutex);
-		return -1;
-	    }
-	    end = sys + s->map_size;
-	    sys = sys + (addr & (s->map_size-1));
-	    for ( ; len > 0; len--, buf++, sys += inc) {
-		if (sys == end)
-		    break;
-		*buf = readb(sys);
-	    }
-	    card_offset += s->map_size;
-	    addr = 0;
-	}
-    }
-    mutex_unlock(&s->ops_mutex);
-    dev_dbg(&s->dev, "  %#2.2x %#2.2x %#2.2x %#2.2x ...\n",
-	  *(u_char *)(ptr+0), *(u_char *)(ptr+1),
-	  *(u_char *)(ptr+2), *(u_char *)(ptr+3));
-    return 0;
+	mutex_unlock(&s->ops_mutex);
+	dev_dbg(&s->dev, "  %#2.2x %#2.2x %#2.2x %#2.2x ...\n",
+		*(u_char *)(ptr+0), *(u_char *)(ptr+1),
+		*(u_char *)(ptr+2), *(u_char *)(ptr+3));
+	return 0;
 }
 
 
+/**
+ * pcmcia_write_cis_mem() - low-level function to write CIS memory
+ *
+ * Probably only useful for writing one-byte registers.
+ */
 void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr,
 		   u_int len, void *ptr)
 {
-    void __iomem *sys, *end;
-    unsigned char *buf = ptr;
+	void __iomem *sys, *end;
+	unsigned char *buf = ptr;
 
-    dev_dbg(&s->dev, "pcmcia_write_cis_mem(%d, %#x, %u)\n", attr, addr, len);
+	dev_dbg(&s->dev,
+		"pcmcia_write_cis_mem(%d, %#x, %u)\n", attr, addr, len);
 
-    mutex_lock(&s->ops_mutex);
-    if (attr & IS_INDIRECT) {
-	/* Indirect accesses use a bunch of special registers at fixed
-	   locations in common memory */
-	u_char flags = ICTRL0_COMMON|ICTRL0_AUTOINC|ICTRL0_BYTEGRAN;
-	if (attr & IS_ATTR) {
-	    addr *= 2;
-	    flags = ICTRL0_AUTOINC;
+	mutex_lock(&s->ops_mutex);
+	if (attr & IS_INDIRECT) {
+		/* Indirect accesses use a bunch of special registers at fixed
+		   locations in common memory */
+		u_char flags = ICTRL0_COMMON|ICTRL0_AUTOINC|ICTRL0_BYTEGRAN;
+		if (attr & IS_ATTR) {
+			addr *= 2;
+			flags = ICTRL0_AUTOINC;
+		}
+
+		sys = set_cis_map(s, 0, MAP_ACTIVE |
+				((cis_width) ? MAP_16BIT : 0));
+		if (!sys) {
+			dev_dbg(&s->dev, "could not map memory\n");
+			mutex_unlock(&s->ops_mutex);
+			return; /* FIXME: Error */
+		}
+
+		writeb(flags, sys+CISREG_ICTRL0);
+		writeb(addr & 0xff, sys+CISREG_IADDR0);
+		writeb((addr>>8) & 0xff, sys+CISREG_IADDR1);
+		writeb((addr>>16) & 0xff, sys+CISREG_IADDR2);
+		writeb((addr>>24) & 0xff, sys+CISREG_IADDR3);
+		for ( ; len > 0; len--, buf++)
+			writeb(*buf, sys+CISREG_IDATA0);
+	} else {
+		u_int inc = 1, card_offset, flags;
+
+		flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
+		if (attr & IS_ATTR) {
+			flags |= MAP_ATTRIB;
+			inc++;
+			addr *= 2;
+		}
+
+		card_offset = addr & ~(s->map_size-1);
+		while (len) {
+			sys = set_cis_map(s, card_offset, flags);
+			if (!sys) {
+				dev_dbg(&s->dev, "could not map memory\n");
+				mutex_unlock(&s->ops_mutex);
+				return; /* FIXME: error */
+			}
+
+			end = sys + s->map_size;
+			sys = sys + (addr & (s->map_size-1));
+			for ( ; len > 0; len--, buf++, sys += inc) {
+				if (sys == end)
+					break;
+				writeb(*buf, sys);
+			}
+			card_offset += s->map_size;
+			addr = 0;
+		}
 	}
-
-	sys = set_cis_map(s, 0, MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0));
-	if (!sys) {
-		dev_dbg(&s->dev, "could not map memory\n");
-		mutex_unlock(&s->ops_mutex);
-		return; /* FIXME: Error */
-	}
-
-	writeb(flags, sys+CISREG_ICTRL0);
-	writeb(addr & 0xff, sys+CISREG_IADDR0);
-	writeb((addr>>8) & 0xff, sys+CISREG_IADDR1);
-	writeb((addr>>16) & 0xff, sys+CISREG_IADDR2);
-	writeb((addr>>24) & 0xff, sys+CISREG_IADDR3);
-	for ( ; len > 0; len--, buf++)
-	    writeb(*buf, sys+CISREG_IDATA0);
-    } else {
-	u_int inc = 1, card_offset, flags;
-
-	flags = MAP_ACTIVE | ((cis_width) ? MAP_16BIT : 0);
-	if (attr & IS_ATTR) {
-	    flags |= MAP_ATTRIB;
-	    inc++;
-	    addr *= 2;
-	}
-
-	card_offset = addr & ~(s->map_size-1);
-	while (len) {
-	    sys = set_cis_map(s, card_offset, flags);
-	    if (!sys) {
-		dev_dbg(&s->dev, "could not map memory\n");
-		mutex_unlock(&s->ops_mutex);
-		return; /* FIXME: error */
-	    }
-
-	    end = sys + s->map_size;
-	    sys = sys + (addr & (s->map_size-1));
-	    for ( ; len > 0; len--, buf++, sys += inc) {
-		if (sys == end)
-		    break;
-		writeb(*buf, sys);
-	    }
-	    card_offset += s->map_size;
-	    addr = 0;
-	}
-    }
-    mutex_unlock(&s->ops_mutex);
+	mutex_unlock(&s->ops_mutex);
 }
 
 
-/*======================================================================
-
-    This is a wrapper around read_cis_mem, with the same interface,
-    but which caches information, for cards whose CIS may not be
-    readable all the time.
-
-======================================================================*/
-
+/**
+ * read_cis_cache() - read CIS memory or its associated cache
+ *
+ * This is a wrapper around read_cis_mem, with the same interface,
+ * but which caches information, for cards whose CIS may not be
+ * readable all the time.
+ */
 static int read_cis_cache(struct pcmcia_socket *s, int attr, u_int addr,
 			size_t len, void *ptr)
 {
@@ -353,7 +356,6 @@
  * This destroys the CIS cache but keeps any fake CIS alive. Must be
  * called with ops_mutex held.
  */
-
 void destroy_cis_cache(struct pcmcia_socket *s)
 {
 	struct list_head *l, *n;
@@ -366,13 +368,9 @@
 	}
 }
 
-/*======================================================================
-
-    This verifies if the CIS of a card matches what is in the CIS
-    cache.
-
-======================================================================*/
-
+/**
+ * verify_cis_cache() - does the CIS match what is in the CIS cache?
+ */
 int verify_cis_cache(struct pcmcia_socket *s)
 {
 	struct cis_cache_entry *cis;
@@ -404,13 +402,12 @@
 	return 0;
 }
 
-/*======================================================================
-
-    For really bad cards, we provide a facility for uploading a
-    replacement CIS.
-
-======================================================================*/
-
+/**
+ * pcmcia_replace_cis() - use a replacement CIS instead of the card's CIS
+ *
+ * For really bad cards, we provide a facility for uploading a
+ * replacement CIS.
+ */
 int pcmcia_replace_cis(struct pcmcia_socket *s,
 		       const u8 *data, const size_t len)
 {
@@ -433,17 +430,13 @@
 	return 0;
 }
 
-/*======================================================================
-
-    The high-level CIS tuple services
-
-======================================================================*/
+/* The high-level CIS tuple services */
 
 typedef struct tuple_flags {
-    u_int		link_space:4;
-    u_int		has_link:1;
-    u_int		mfc_fn:3;
-    u_int		space:4;
+	u_int		link_space:4;
+	u_int		has_link:1;
+	u_int		mfc_fn:3;
+	u_int		space:4;
 } tuple_flags;
 
 #define LINK_SPACE(f)	(((tuple_flags *)(&(f)))->link_space)
@@ -451,982 +444,961 @@
 #define MFC_FN(f)	(((tuple_flags *)(&(f)))->mfc_fn)
 #define SPACE(f)	(((tuple_flags *)(&(f)))->space)
 
-int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, tuple_t *tuple)
+int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function,
+			tuple_t *tuple)
 {
-    if (!s)
-	return -EINVAL;
+	if (!s)
+		return -EINVAL;
 
-    if (!(s->state & SOCKET_PRESENT) || (s->state & SOCKET_CARDBUS))
-	return -ENODEV;
-    tuple->TupleLink = tuple->Flags = 0;
+	if (!(s->state & SOCKET_PRESENT) || (s->state & SOCKET_CARDBUS))
+		return -ENODEV;
+	tuple->TupleLink = tuple->Flags = 0;
 
-    /* Assume presence of a LONGLINK_C to address 0 */
-    tuple->CISOffset = tuple->LinkOffset = 0;
-    SPACE(tuple->Flags) = HAS_LINK(tuple->Flags) = 1;
+	/* Assume presence of a LONGLINK_C to address 0 */
+	tuple->CISOffset = tuple->LinkOffset = 0;
+	SPACE(tuple->Flags) = HAS_LINK(tuple->Flags) = 1;
 
-    if ((s->functions > 1) && !(tuple->Attributes & TUPLE_RETURN_COMMON)) {
-	cisdata_t req = tuple->DesiredTuple;
-	tuple->DesiredTuple = CISTPL_LONGLINK_MFC;
-	if (pccard_get_next_tuple(s, function, tuple) == 0) {
-	    tuple->DesiredTuple = CISTPL_LINKTARGET;
-	    if (pccard_get_next_tuple(s, function, tuple) != 0)
-		return -ENOSPC;
-	} else
-	    tuple->CISOffset = tuple->TupleLink = 0;
-	tuple->DesiredTuple = req;
-    }
-    return pccard_get_next_tuple(s, function, tuple);
+	if ((s->functions > 1) && !(tuple->Attributes & TUPLE_RETURN_COMMON)) {
+		cisdata_t req = tuple->DesiredTuple;
+		tuple->DesiredTuple = CISTPL_LONGLINK_MFC;
+		if (pccard_get_next_tuple(s, function, tuple) == 0) {
+			tuple->DesiredTuple = CISTPL_LINKTARGET;
+			if (pccard_get_next_tuple(s, function, tuple) != 0)
+				return -ENOSPC;
+		} else
+			tuple->CISOffset = tuple->TupleLink = 0;
+		tuple->DesiredTuple = req;
+	}
+	return pccard_get_next_tuple(s, function, tuple);
 }
 
 static int follow_link(struct pcmcia_socket *s, tuple_t *tuple)
 {
-    u_char link[5];
-    u_int ofs;
-    int ret;
+	u_char link[5];
+	u_int ofs;
+	int ret;
 
-    if (MFC_FN(tuple->Flags)) {
-	/* Get indirect link from the MFC tuple */
-	ret = read_cis_cache(s, LINK_SPACE(tuple->Flags),
-		       tuple->LinkOffset, 5, link);
-	if (ret)
+	if (MFC_FN(tuple->Flags)) {
+		/* Get indirect link from the MFC tuple */
+		ret = read_cis_cache(s, LINK_SPACE(tuple->Flags),
+				tuple->LinkOffset, 5, link);
+		if (ret)
+			return -1;
+		ofs = get_unaligned_le32(link + 1);
+		SPACE(tuple->Flags) = (link[0] == CISTPL_MFC_ATTR);
+		/* Move to the next indirect link */
+		tuple->LinkOffset += 5;
+		MFC_FN(tuple->Flags)--;
+	} else if (HAS_LINK(tuple->Flags)) {
+		ofs = tuple->LinkOffset;
+		SPACE(tuple->Flags) = LINK_SPACE(tuple->Flags);
+		HAS_LINK(tuple->Flags) = 0;
+	} else
 		return -1;
-	ofs = get_unaligned_le32(link + 1);
-	SPACE(tuple->Flags) = (link[0] == CISTPL_MFC_ATTR);
-	/* Move to the next indirect link */
-	tuple->LinkOffset += 5;
-	MFC_FN(tuple->Flags)--;
-    } else if (HAS_LINK(tuple->Flags)) {
-	ofs = tuple->LinkOffset;
-	SPACE(tuple->Flags) = LINK_SPACE(tuple->Flags);
-	HAS_LINK(tuple->Flags) = 0;
-    } else {
-	return -1;
-    }
-    if (SPACE(tuple->Flags)) {
-	/* This is ugly, but a common CIS error is to code the long
-	   link offset incorrectly, so we check the right spot... */
+
+	if (SPACE(tuple->Flags)) {
+		/* This is ugly, but a common CIS error is to code the long
+		   link offset incorrectly, so we check the right spot... */
+		ret = read_cis_cache(s, SPACE(tuple->Flags), ofs, 5, link);
+		if (ret)
+			return -1;
+		if ((link[0] == CISTPL_LINKTARGET) && (link[1] >= 3) &&
+			(strncmp(link+2, "CIS", 3) == 0))
+			return ofs;
+		remove_cis_cache(s, SPACE(tuple->Flags), ofs, 5);
+		/* Then, we try the wrong spot... */
+		ofs = ofs >> 1;
+	}
 	ret = read_cis_cache(s, SPACE(tuple->Flags), ofs, 5, link);
 	if (ret)
 		return -1;
 	if ((link[0] == CISTPL_LINKTARGET) && (link[1] >= 3) &&
-	    (strncmp(link+2, "CIS", 3) == 0))
-	    return ofs;
+		(strncmp(link+2, "CIS", 3) == 0))
+		return ofs;
 	remove_cis_cache(s, SPACE(tuple->Flags), ofs, 5);
-	/* Then, we try the wrong spot... */
-	ofs = ofs >> 1;
-    }
-    ret = read_cis_cache(s, SPACE(tuple->Flags), ofs, 5, link);
-    if (ret)
-	    return -1;
-    if ((link[0] == CISTPL_LINKTARGET) && (link[1] >= 3) &&
-	(strncmp(link+2, "CIS", 3) == 0))
-	return ofs;
-    remove_cis_cache(s, SPACE(tuple->Flags), ofs, 5);
-    return -1;
+	return -1;
 }
 
-int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, tuple_t *tuple)
+int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function,
+			tuple_t *tuple)
 {
-    u_char link[2], tmp;
-    int ofs, i, attr;
-    int ret;
+	u_char link[2], tmp;
+	int ofs, i, attr;
+	int ret;
 
-    if (!s)
-	return -EINVAL;
-    if (!(s->state & SOCKET_PRESENT) || (s->state & SOCKET_CARDBUS))
-	return -ENODEV;
+	if (!s)
+		return -EINVAL;
+	if (!(s->state & SOCKET_PRESENT) || (s->state & SOCKET_CARDBUS))
+		return -ENODEV;
 
-    link[1] = tuple->TupleLink;
-    ofs = tuple->CISOffset + tuple->TupleLink;
-    attr = SPACE(tuple->Flags);
+	link[1] = tuple->TupleLink;
+	ofs = tuple->CISOffset + tuple->TupleLink;
+	attr = SPACE(tuple->Flags);
 
-    for (i = 0; i < MAX_TUPLES; i++) {
-	if (link[1] == 0xff) {
-	    link[0] = CISTPL_END;
-	} else {
-	    ret = read_cis_cache(s, attr, ofs, 2, link);
-	    if (ret)
-		    return -1;
-	    if (link[0] == CISTPL_NULL) {
-		ofs++; continue;
-	    }
-	}
-
-	/* End of chain?  Follow long link if possible */
-	if (link[0] == CISTPL_END) {
-	    ofs = follow_link(s, tuple);
-	    if (ofs < 0)
-		return -ENOSPC;
-	    attr = SPACE(tuple->Flags);
-	    ret = read_cis_cache(s, attr, ofs, 2, link);
-	    if (ret)
-		    return -1;
-	}
-
-	/* Is this a link tuple?  Make a note of it */
-	if ((link[0] == CISTPL_LONGLINK_A) ||
-	    (link[0] == CISTPL_LONGLINK_C) ||
-	    (link[0] == CISTPL_LONGLINK_MFC) ||
-	    (link[0] == CISTPL_LINKTARGET) ||
-	    (link[0] == CISTPL_INDIRECT) ||
-	    (link[0] == CISTPL_NO_LINK)) {
-	    switch (link[0]) {
-	    case CISTPL_LONGLINK_A:
-		HAS_LINK(tuple->Flags) = 1;
-		LINK_SPACE(tuple->Flags) = attr | IS_ATTR;
-		ret = read_cis_cache(s, attr, ofs+2, 4, &tuple->LinkOffset);
-		if (ret)
-			return -1;
-		break;
-	    case CISTPL_LONGLINK_C:
-		HAS_LINK(tuple->Flags) = 1;
-		LINK_SPACE(tuple->Flags) = attr & ~IS_ATTR;
-		ret = read_cis_cache(s, attr, ofs+2, 4, &tuple->LinkOffset);
-		if (ret)
-			return -1;
-		break;
-	    case CISTPL_INDIRECT:
-		HAS_LINK(tuple->Flags) = 1;
-		LINK_SPACE(tuple->Flags) = IS_ATTR | IS_INDIRECT;
-		tuple->LinkOffset = 0;
-		break;
-	    case CISTPL_LONGLINK_MFC:
-		tuple->LinkOffset = ofs + 3;
-		LINK_SPACE(tuple->Flags) = attr;
-		if (function == BIND_FN_ALL) {
-		    /* Follow all the MFC links */
-		    ret = read_cis_cache(s, attr, ofs+2, 1, &tmp);
-		    if (ret)
-			    return -1;
-		    MFC_FN(tuple->Flags) = tmp;
-		} else {
-		    /* Follow exactly one of the links */
-		    MFC_FN(tuple->Flags) = 1;
-		    tuple->LinkOffset += function * 5;
+	for (i = 0; i < MAX_TUPLES; i++) {
+		if (link[1] == 0xff)
+			link[0] = CISTPL_END;
+		else {
+			ret = read_cis_cache(s, attr, ofs, 2, link);
+			if (ret)
+				return -1;
+			if (link[0] == CISTPL_NULL) {
+				ofs++;
+				continue;
+			}
 		}
-		break;
-	    case CISTPL_NO_LINK:
-		HAS_LINK(tuple->Flags) = 0;
-		break;
-	    }
-	    if ((tuple->Attributes & TUPLE_RETURN_LINK) &&
-		(tuple->DesiredTuple == RETURN_FIRST_TUPLE))
-		break;
-	} else
-	    if (tuple->DesiredTuple == RETURN_FIRST_TUPLE)
-		break;
 
-	if (link[0] == tuple->DesiredTuple)
-	    break;
-	ofs += link[1] + 2;
-    }
-    if (i == MAX_TUPLES) {
-	dev_dbg(&s->dev, "cs: overrun in pcmcia_get_next_tuple\n");
-	return -ENOSPC;
-    }
+		/* End of chain?  Follow long link if possible */
+		if (link[0] == CISTPL_END) {
+			ofs = follow_link(s, tuple);
+			if (ofs < 0)
+				return -ENOSPC;
+			attr = SPACE(tuple->Flags);
+			ret = read_cis_cache(s, attr, ofs, 2, link);
+			if (ret)
+				return -1;
+		}
 
-    tuple->TupleCode = link[0];
-    tuple->TupleLink = link[1];
-    tuple->CISOffset = ofs + 2;
-    return 0;
+		/* Is this a link tuple?  Make a note of it */
+		if ((link[0] == CISTPL_LONGLINK_A) ||
+			(link[0] == CISTPL_LONGLINK_C) ||
+			(link[0] == CISTPL_LONGLINK_MFC) ||
+			(link[0] == CISTPL_LINKTARGET) ||
+			(link[0] == CISTPL_INDIRECT) ||
+			(link[0] == CISTPL_NO_LINK)) {
+			switch (link[0]) {
+			case CISTPL_LONGLINK_A:
+				HAS_LINK(tuple->Flags) = 1;
+				LINK_SPACE(tuple->Flags) = attr | IS_ATTR;
+				ret = read_cis_cache(s, attr, ofs+2, 4,
+						&tuple->LinkOffset);
+				if (ret)
+					return -1;
+				break;
+			case CISTPL_LONGLINK_C:
+				HAS_LINK(tuple->Flags) = 1;
+				LINK_SPACE(tuple->Flags) = attr & ~IS_ATTR;
+				ret = read_cis_cache(s, attr, ofs+2, 4,
+						&tuple->LinkOffset);
+				if (ret)
+					return -1;
+				break;
+			case CISTPL_INDIRECT:
+				HAS_LINK(tuple->Flags) = 1;
+				LINK_SPACE(tuple->Flags) = IS_ATTR |
+					IS_INDIRECT;
+				tuple->LinkOffset = 0;
+				break;
+			case CISTPL_LONGLINK_MFC:
+				tuple->LinkOffset = ofs + 3;
+				LINK_SPACE(tuple->Flags) = attr;
+				if (function == BIND_FN_ALL) {
+					/* Follow all the MFC links */
+					ret = read_cis_cache(s, attr, ofs+2,
+							1, &tmp);
+					if (ret)
+						return -1;
+					MFC_FN(tuple->Flags) = tmp;
+				} else {
+					/* Follow exactly one of the links */
+					MFC_FN(tuple->Flags) = 1;
+					tuple->LinkOffset += function * 5;
+				}
+				break;
+			case CISTPL_NO_LINK:
+				HAS_LINK(tuple->Flags) = 0;
+				break;
+			}
+			if ((tuple->Attributes & TUPLE_RETURN_LINK) &&
+				(tuple->DesiredTuple == RETURN_FIRST_TUPLE))
+				break;
+		} else
+			if (tuple->DesiredTuple == RETURN_FIRST_TUPLE)
+				break;
+
+		if (link[0] == tuple->DesiredTuple)
+			break;
+		ofs += link[1] + 2;
+	}
+	if (i == MAX_TUPLES) {
+		dev_dbg(&s->dev, "cs: overrun in pcmcia_get_next_tuple\n");
+		return -ENOSPC;
+	}
+
+	tuple->TupleCode = link[0];
+	tuple->TupleLink = link[1];
+	tuple->CISOffset = ofs + 2;
+	return 0;
 }
 
-/*====================================================================*/
-
-#define _MIN(a, b)		(((a) < (b)) ? (a) : (b))
-
 int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple)
 {
-    u_int len;
-    int ret;
+	u_int len;
+	int ret;
 
-    if (!s)
-	return -EINVAL;
+	if (!s)
+		return -EINVAL;
 
-    if (tuple->TupleLink < tuple->TupleOffset)
-	return -ENOSPC;
-    len = tuple->TupleLink - tuple->TupleOffset;
-    tuple->TupleDataLen = tuple->TupleLink;
-    if (len == 0)
+	if (tuple->TupleLink < tuple->TupleOffset)
+		return -ENOSPC;
+	len = tuple->TupleLink - tuple->TupleOffset;
+	tuple->TupleDataLen = tuple->TupleLink;
+	if (len == 0)
+		return 0;
+	ret = read_cis_cache(s, SPACE(tuple->Flags),
+			tuple->CISOffset + tuple->TupleOffset,
+			min(len, (u_int) tuple->TupleDataMax),
+			tuple->TupleData);
+	if (ret)
+		return -1;
 	return 0;
-    ret = read_cis_cache(s, SPACE(tuple->Flags),
-		   tuple->CISOffset + tuple->TupleOffset,
-		   _MIN(len, tuple->TupleDataMax), tuple->TupleData);
-    if (ret)
-	    return -1;
-    return 0;
 }
 
 
-/*======================================================================
-
-    Parsing routines for individual tuples
-
-======================================================================*/
+/* Parsing routines for individual tuples */
 
 static int parse_device(tuple_t *tuple, cistpl_device_t *device)
 {
-    int i;
-    u_char scale;
-    u_char *p, *q;
+	int i;
+	u_char scale;
+	u_char *p, *q;
 
-    p = (u_char *)tuple->TupleData;
-    q = p + tuple->TupleDataLen;
+	p = (u_char *)tuple->TupleData;
+	q = p + tuple->TupleDataLen;
 
-    device->ndev = 0;
-    for (i = 0; i < CISTPL_MAX_DEVICES; i++) {
+	device->ndev = 0;
+	for (i = 0; i < CISTPL_MAX_DEVICES; i++) {
 
-	if (*p == 0xff)
-		break;
-	device->dev[i].type = (*p >> 4);
-	device->dev[i].wp = (*p & 0x08) ? 1 : 0;
-	switch (*p & 0x07) {
-	case 0:
-		device->dev[i].speed = 0;
-		break;
-	case 1:
-		device->dev[i].speed = 250;
-		break;
-	case 2:
-		device->dev[i].speed = 200;
-		break;
-	case 3:
-		device->dev[i].speed = 150;
-		break;
-	case 4:
-		device->dev[i].speed = 100;
-		break;
-	case 7:
-		if (++p == q)
-			return -EINVAL;
-		device->dev[i].speed = SPEED_CVT(*p);
-		while (*p & 0x80)
+		if (*p == 0xff)
+			break;
+		device->dev[i].type = (*p >> 4);
+		device->dev[i].wp = (*p & 0x08) ? 1 : 0;
+		switch (*p & 0x07) {
+		case 0:
+			device->dev[i].speed = 0;
+			break;
+		case 1:
+			device->dev[i].speed = 250;
+			break;
+		case 2:
+			device->dev[i].speed = 200;
+			break;
+		case 3:
+			device->dev[i].speed = 150;
+			break;
+		case 4:
+			device->dev[i].speed = 100;
+			break;
+		case 7:
 			if (++p == q)
 				return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
+			device->dev[i].speed = SPEED_CVT(*p);
+			while (*p & 0x80)
+				if (++p == q)
+					return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (++p == q)
+			return -EINVAL;
+		if (*p == 0xff)
+			break;
+		scale = *p & 7;
+		if (scale == 7)
+			return -EINVAL;
+		device->dev[i].size = ((*p >> 3) + 1) * (512 << (scale*2));
+		device->ndev++;
+		if (++p == q)
+			break;
 	}
 
-	if (++p == q)
-		return -EINVAL;
-	if (*p == 0xff)
-		break;
-	scale = *p & 7;
-	if (scale == 7)
-		return -EINVAL;
-	device->dev[i].size = ((*p >> 3) + 1) * (512 << (scale*2));
-	device->ndev++;
-	if (++p == q)
-		break;
-    }
-
-    return 0;
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_checksum(tuple_t *tuple, cistpl_checksum_t *csum)
 {
-    u_char *p;
-    if (tuple->TupleDataLen < 5)
-	return -EINVAL;
-    p = (u_char *) tuple->TupleData;
-    csum->addr = tuple->CISOffset + get_unaligned_le16(p) - 2;
-    csum->len = get_unaligned_le16(p + 2);
-    csum->sum = *(p + 4);
-    return 0;
+	u_char *p;
+	if (tuple->TupleDataLen < 5)
+		return -EINVAL;
+	p = (u_char *) tuple->TupleData;
+	csum->addr = tuple->CISOffset + get_unaligned_le16(p) - 2;
+	csum->len = get_unaligned_le16(p + 2);
+	csum->sum = *(p + 4);
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_longlink(tuple_t *tuple, cistpl_longlink_t *link)
 {
-    if (tuple->TupleDataLen < 4)
-	return -EINVAL;
-    link->addr = get_unaligned_le32(tuple->TupleData);
-    return 0;
+	if (tuple->TupleDataLen < 4)
+		return -EINVAL;
+	link->addr = get_unaligned_le32(tuple->TupleData);
+	return 0;
 }
 
-/*====================================================================*/
 
-static int parse_longlink_mfc(tuple_t *tuple,
-			      cistpl_longlink_mfc_t *link)
+static int parse_longlink_mfc(tuple_t *tuple, cistpl_longlink_mfc_t *link)
 {
-    u_char *p;
-    int i;
+	u_char *p;
+	int i;
 
-    p = (u_char *)tuple->TupleData;
+	p = (u_char *)tuple->TupleData;
 
-    link->nfn = *p; p++;
-    if (tuple->TupleDataLen <= link->nfn*5)
-	return -EINVAL;
-    for (i = 0; i < link->nfn; i++) {
-	link->fn[i].space = *p; p++;
-	link->fn[i].addr = get_unaligned_le32(p);
-	p += 4;
-    }
-    return 0;
+	link->nfn = *p; p++;
+	if (tuple->TupleDataLen <= link->nfn*5)
+		return -EINVAL;
+	for (i = 0; i < link->nfn; i++) {
+		link->fn[i].space = *p; p++;
+		link->fn[i].addr = get_unaligned_le32(p);
+		p += 4;
+	}
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_strings(u_char *p, u_char *q, int max,
 			 char *s, u_char *ofs, u_char *found)
 {
-    int i, j, ns;
+	int i, j, ns;
 
-    if (p == q)
-	    return -EINVAL;
-    ns = 0; j = 0;
-    for (i = 0; i < max; i++) {
-	if (*p == 0xff)
-		break;
-	ofs[i] = j;
-	ns++;
-	for (;;) {
-	    s[j++] = (*p == 0xff) ? '\0' : *p;
-	    if ((*p == '\0') || (*p == 0xff))
-		    break;
-	    if (++p == q)
-		    return -EINVAL;
+	if (p == q)
+		return -EINVAL;
+	ns = 0; j = 0;
+	for (i = 0; i < max; i++) {
+		if (*p == 0xff)
+			break;
+		ofs[i] = j;
+		ns++;
+		for (;;) {
+			s[j++] = (*p == 0xff) ? '\0' : *p;
+			if ((*p == '\0') || (*p == 0xff))
+				break;
+			if (++p == q)
+				return -EINVAL;
+		}
+		if ((*p == 0xff) || (++p == q))
+			break;
 	}
-	if ((*p == 0xff) || (++p == q))
-		break;
-    }
-    if (found) {
-	*found = ns;
-	return 0;
-    } else {
+	if (found) {
+		*found = ns;
+		return 0;
+	}
+
 	return (ns == max) ? 0 : -EINVAL;
-    }
 }
 
-/*====================================================================*/
 
 static int parse_vers_1(tuple_t *tuple, cistpl_vers_1_t *vers_1)
 {
-    u_char *p, *q;
+	u_char *p, *q;
 
-    p = (u_char *)tuple->TupleData;
-    q = p + tuple->TupleDataLen;
+	p = (u_char *)tuple->TupleData;
+	q = p + tuple->TupleDataLen;
 
-    vers_1->major = *p; p++;
-    vers_1->minor = *p; p++;
-    if (p >= q)
-	    return -EINVAL;
+	vers_1->major = *p; p++;
+	vers_1->minor = *p; p++;
+	if (p >= q)
+		return -EINVAL;
 
-    return parse_strings(p, q, CISTPL_VERS_1_MAX_PROD_STRINGS,
-			 vers_1->str, vers_1->ofs, &vers_1->ns);
+	return parse_strings(p, q, CISTPL_VERS_1_MAX_PROD_STRINGS,
+			vers_1->str, vers_1->ofs, &vers_1->ns);
 }
 
-/*====================================================================*/
 
 static int parse_altstr(tuple_t *tuple, cistpl_altstr_t *altstr)
 {
-    u_char *p, *q;
+	u_char *p, *q;
 
-    p = (u_char *)tuple->TupleData;
-    q = p + tuple->TupleDataLen;
+	p = (u_char *)tuple->TupleData;
+	q = p + tuple->TupleDataLen;
 
-    return parse_strings(p, q, CISTPL_MAX_ALTSTR_STRINGS,
-			 altstr->str, altstr->ofs, &altstr->ns);
+	return parse_strings(p, q, CISTPL_MAX_ALTSTR_STRINGS,
+			altstr->str, altstr->ofs, &altstr->ns);
 }
 
-/*====================================================================*/
 
 static int parse_jedec(tuple_t *tuple, cistpl_jedec_t *jedec)
 {
-    u_char *p, *q;
-    int nid;
+	u_char *p, *q;
+	int nid;
 
-    p = (u_char *)tuple->TupleData;
-    q = p + tuple->TupleDataLen;
+	p = (u_char *)tuple->TupleData;
+	q = p + tuple->TupleDataLen;
 
-    for (nid = 0; nid < CISTPL_MAX_DEVICES; nid++) {
-	if (p > q-2)
-		break;
-	jedec->id[nid].mfr = p[0];
-	jedec->id[nid].info = p[1];
-	p += 2;
-    }
-    jedec->nid = nid;
-    return 0;
+	for (nid = 0; nid < CISTPL_MAX_DEVICES; nid++) {
+		if (p > q-2)
+			break;
+		jedec->id[nid].mfr = p[0];
+		jedec->id[nid].info = p[1];
+		p += 2;
+	}
+	jedec->nid = nid;
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_manfid(tuple_t *tuple, cistpl_manfid_t *m)
 {
-    if (tuple->TupleDataLen < 4)
-	return -EINVAL;
-    m->manf = get_unaligned_le16(tuple->TupleData);
-    m->card = get_unaligned_le16(tuple->TupleData + 2);
-    return 0;
+	if (tuple->TupleDataLen < 4)
+		return -EINVAL;
+	m->manf = get_unaligned_le16(tuple->TupleData);
+	m->card = get_unaligned_le16(tuple->TupleData + 2);
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_funcid(tuple_t *tuple, cistpl_funcid_t *f)
 {
-    u_char *p;
-    if (tuple->TupleDataLen < 2)
-	return -EINVAL;
-    p = (u_char *)tuple->TupleData;
-    f->func = p[0];
-    f->sysinit = p[1];
-    return 0;
+	u_char *p;
+	if (tuple->TupleDataLen < 2)
+		return -EINVAL;
+	p = (u_char *)tuple->TupleData;
+	f->func = p[0];
+	f->sysinit = p[1];
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_funce(tuple_t *tuple, cistpl_funce_t *f)
 {
-    u_char *p;
-    int i;
-    if (tuple->TupleDataLen < 1)
-	return -EINVAL;
-    p = (u_char *)tuple->TupleData;
-    f->type = p[0];
-    for (i = 1; i < tuple->TupleDataLen; i++)
-	f->data[i-1] = p[i];
-    return 0;
+	u_char *p;
+	int i;
+	if (tuple->TupleDataLen < 1)
+		return -EINVAL;
+	p = (u_char *)tuple->TupleData;
+	f->type = p[0];
+	for (i = 1; i < tuple->TupleDataLen; i++)
+		f->data[i-1] = p[i];
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_config(tuple_t *tuple, cistpl_config_t *config)
 {
-    int rasz, rmsz, i;
-    u_char *p;
+	int rasz, rmsz, i;
+	u_char *p;
 
-    p = (u_char *)tuple->TupleData;
-    rasz = *p & 0x03;
-    rmsz = (*p & 0x3c) >> 2;
-    if (tuple->TupleDataLen < rasz+rmsz+4)
-	return -EINVAL;
-    config->last_idx = *(++p);
-    p++;
-    config->base = 0;
-    for (i = 0; i <= rasz; i++)
-	config->base += p[i] << (8*i);
-    p += rasz+1;
-    for (i = 0; i < 4; i++)
-	config->rmask[i] = 0;
-    for (i = 0; i <= rmsz; i++)
-	config->rmask[i>>2] += p[i] << (8*(i%4));
-    config->subtuples = tuple->TupleDataLen - (rasz+rmsz+4);
-    return 0;
+	p = (u_char *)tuple->TupleData;
+	rasz = *p & 0x03;
+	rmsz = (*p & 0x3c) >> 2;
+	if (tuple->TupleDataLen < rasz+rmsz+4)
+		return -EINVAL;
+	config->last_idx = *(++p);
+	p++;
+	config->base = 0;
+	for (i = 0; i <= rasz; i++)
+		config->base += p[i] << (8*i);
+	p += rasz+1;
+	for (i = 0; i < 4; i++)
+		config->rmask[i] = 0;
+	for (i = 0; i <= rmsz; i++)
+		config->rmask[i>>2] += p[i] << (8*(i%4));
+	config->subtuples = tuple->TupleDataLen - (rasz+rmsz+4);
+	return 0;
 }
 
-/*======================================================================
+/* The following routines are all used to parse the nightmarish
+ * config table entries.
+ */
 
-    The following routines are all used to parse the nightmarish
-    config table entries.
-
-======================================================================*/
-
-static u_char *parse_power(u_char *p, u_char *q,
-			   cistpl_power_t *pwr)
+static u_char *parse_power(u_char *p, u_char *q, cistpl_power_t *pwr)
 {
-    int i;
-    u_int scale;
+	int i;
+	u_int scale;
 
-    if (p == q)
-	    return NULL;
-    pwr->present = *p;
-    pwr->flags = 0;
-    p++;
-    for (i = 0; i < 7; i++)
-	if (pwr->present & (1<<i)) {
-	    if (p == q)
-		    return NULL;
-	    pwr->param[i] = POWER_CVT(*p);
-	    scale = POWER_SCALE(*p);
-	    while (*p & 0x80) {
+	if (p == q)
+		return NULL;
+	pwr->present = *p;
+	pwr->flags = 0;
+	p++;
+	for (i = 0; i < 7; i++)
+		if (pwr->present & (1<<i)) {
+			if (p == q)
+				return NULL;
+			pwr->param[i] = POWER_CVT(*p);
+			scale = POWER_SCALE(*p);
+			while (*p & 0x80) {
+				if (++p == q)
+					return NULL;
+				if ((*p & 0x7f) < 100)
+					pwr->param[i] +=
+						(*p & 0x7f) * scale / 100;
+				else if (*p == 0x7d)
+					pwr->flags |= CISTPL_POWER_HIGHZ_OK;
+				else if (*p == 0x7e)
+					pwr->param[i] = 0;
+				else if (*p == 0x7f)
+					pwr->flags |= CISTPL_POWER_HIGHZ_REQ;
+				else
+					return NULL;
+			}
+			p++;
+		}
+	return p;
+}
+
+
+static u_char *parse_timing(u_char *p, u_char *q, cistpl_timing_t *timing)
+{
+	u_char scale;
+
+	if (p == q)
+		return NULL;
+	scale = *p;
+	if ((scale & 3) != 3) {
 		if (++p == q)
 			return NULL;
-		if ((*p & 0x7f) < 100)
-		    pwr->param[i] += (*p & 0x7f) * scale / 100;
-		else if (*p == 0x7d)
-		    pwr->flags |= CISTPL_POWER_HIGHZ_OK;
-		else if (*p == 0x7e)
-		    pwr->param[i] = 0;
-		else if (*p == 0x7f)
-		    pwr->flags |= CISTPL_POWER_HIGHZ_REQ;
-		else
-		    return NULL;
-	    }
-	    p++;
-	}
-    return p;
+		timing->wait = SPEED_CVT(*p);
+		timing->waitscale = exponent[scale & 3];
+	} else
+		timing->wait = 0;
+	scale >>= 2;
+	if ((scale & 7) != 7) {
+		if (++p == q)
+			return NULL;
+		timing->ready = SPEED_CVT(*p);
+		timing->rdyscale = exponent[scale & 7];
+	} else
+		timing->ready = 0;
+	scale >>= 3;
+	if (scale != 7) {
+		if (++p == q)
+			return NULL;
+		timing->reserved = SPEED_CVT(*p);
+		timing->rsvscale = exponent[scale];
+	} else
+		timing->reserved = 0;
+	p++;
+	return p;
 }
 
-/*====================================================================*/
-
-static u_char *parse_timing(u_char *p, u_char *q,
-			    cistpl_timing_t *timing)
-{
-    u_char scale;
-
-    if (p == q)
-	    return NULL;
-    scale = *p;
-    if ((scale & 3) != 3) {
-	if (++p == q)
-		return NULL;
-	timing->wait = SPEED_CVT(*p);
-	timing->waitscale = exponent[scale & 3];
-    } else
-	timing->wait = 0;
-    scale >>= 2;
-    if ((scale & 7) != 7) {
-	if (++p == q)
-		return NULL;
-	timing->ready = SPEED_CVT(*p);
-	timing->rdyscale = exponent[scale & 7];
-    } else
-	timing->ready = 0;
-    scale >>= 3;
-    if (scale != 7) {
-	if (++p == q)
-		return NULL;
-	timing->reserved = SPEED_CVT(*p);
-	timing->rsvscale = exponent[scale];
-    } else
-	timing->reserved = 0;
-    p++;
-    return p;
-}
-
-/*====================================================================*/
 
 static u_char *parse_io(u_char *p, u_char *q, cistpl_io_t *io)
 {
-    int i, j, bsz, lsz;
+	int i, j, bsz, lsz;
 
-    if (p == q)
-	    return NULL;
-    io->flags = *p;
+	if (p == q)
+		return NULL;
+	io->flags = *p;
 
-    if (!(*p & 0x80)) {
-	io->nwin = 1;
-	io->win[0].base = 0;
-	io->win[0].len = (1 << (io->flags & CISTPL_IO_LINES_MASK));
-	return p+1;
-    }
-
-    if (++p == q)
-	    return NULL;
-    io->nwin = (*p & 0x0f) + 1;
-    bsz = (*p & 0x30) >> 4;
-    if (bsz == 3)
-	    bsz++;
-    lsz = (*p & 0xc0) >> 6;
-    if (lsz == 3)
-	    lsz++;
-    p++;
-
-    for (i = 0; i < io->nwin; i++) {
-	io->win[i].base = 0;
-	io->win[i].len = 1;
-	for (j = 0; j < bsz; j++, p++) {
-	    if (p == q)
-		    return NULL;
-	    io->win[i].base += *p << (j*8);
+	if (!(*p & 0x80)) {
+		io->nwin = 1;
+		io->win[0].base = 0;
+		io->win[0].len = (1 << (io->flags & CISTPL_IO_LINES_MASK));
+		return p+1;
 	}
-	for (j = 0; j < lsz; j++, p++) {
-	    if (p == q)
-		    return NULL;
-	    io->win[i].len += *p << (j*8);
+
+	if (++p == q)
+		return NULL;
+	io->nwin = (*p & 0x0f) + 1;
+	bsz = (*p & 0x30) >> 4;
+	if (bsz == 3)
+		bsz++;
+	lsz = (*p & 0xc0) >> 6;
+	if (lsz == 3)
+		lsz++;
+	p++;
+
+	for (i = 0; i < io->nwin; i++) {
+		io->win[i].base = 0;
+		io->win[i].len = 1;
+		for (j = 0; j < bsz; j++, p++) {
+			if (p == q)
+				return NULL;
+			io->win[i].base += *p << (j*8);
+		}
+		for (j = 0; j < lsz; j++, p++) {
+			if (p == q)
+				return NULL;
+			io->win[i].len += *p << (j*8);
+		}
 	}
-    }
-    return p;
+	return p;
 }
 
-/*====================================================================*/
 
 static u_char *parse_mem(u_char *p, u_char *q, cistpl_mem_t *mem)
 {
-    int i, j, asz, lsz, has_ha;
-    u_int len, ca, ha;
+	int i, j, asz, lsz, has_ha;
+	u_int len, ca, ha;
 
-    if (p == q)
-	    return NULL;
+	if (p == q)
+		return NULL;
 
-    mem->nwin = (*p & 0x07) + 1;
-    lsz = (*p & 0x18) >> 3;
-    asz = (*p & 0x60) >> 5;
-    has_ha = (*p & 0x80);
-    if (++p == q)
-	    return NULL;
+	mem->nwin = (*p & 0x07) + 1;
+	lsz = (*p & 0x18) >> 3;
+	asz = (*p & 0x60) >> 5;
+	has_ha = (*p & 0x80);
+	if (++p == q)
+		return NULL;
 
-    for (i = 0; i < mem->nwin; i++) {
-	len = ca = ha = 0;
-	for (j = 0; j < lsz; j++, p++) {
-	    if (p == q)
-		    return NULL;
-	    len += *p << (j*8);
+	for (i = 0; i < mem->nwin; i++) {
+		len = ca = ha = 0;
+		for (j = 0; j < lsz; j++, p++) {
+			if (p == q)
+				return NULL;
+			len += *p << (j*8);
+		}
+		for (j = 0; j < asz; j++, p++) {
+			if (p == q)
+				return NULL;
+			ca += *p << (j*8);
+		}
+		if (has_ha)
+			for (j = 0; j < asz; j++, p++) {
+				if (p == q)
+					return NULL;
+				ha += *p << (j*8);
+			}
+		mem->win[i].len = len << 8;
+		mem->win[i].card_addr = ca << 8;
+		mem->win[i].host_addr = ha << 8;
 	}
-	for (j = 0; j < asz; j++, p++) {
-	    if (p == q)
-		    return NULL;
-	    ca += *p << (j*8);
-	}
-	if (has_ha)
-	    for (j = 0; j < asz; j++, p++) {
-		if (p == q)
-			return NULL;
-		ha += *p << (j*8);
-	    }
-	mem->win[i].len = len << 8;
-	mem->win[i].card_addr = ca << 8;
-	mem->win[i].host_addr = ha << 8;
-    }
-    return p;
+	return p;
 }
 
-/*====================================================================*/
 
 static u_char *parse_irq(u_char *p, u_char *q, cistpl_irq_t *irq)
 {
-    if (p == q)
-	    return NULL;
-    irq->IRQInfo1 = *p; p++;
-    if (irq->IRQInfo1 & IRQ_INFO2_VALID) {
-	if (p+2 > q)
+	if (p == q)
 		return NULL;
-	irq->IRQInfo2 = (p[1]<<8) + p[0];
-	p += 2;
-    }
-    return p;
+	irq->IRQInfo1 = *p; p++;
+	if (irq->IRQInfo1 & IRQ_INFO2_VALID) {
+		if (p+2 > q)
+			return NULL;
+		irq->IRQInfo2 = (p[1]<<8) + p[0];
+		p += 2;
+	}
+	return p;
 }
 
-/*====================================================================*/
 
 static int parse_cftable_entry(tuple_t *tuple,
 			       cistpl_cftable_entry_t *entry)
 {
-    u_char *p, *q, features;
+	u_char *p, *q, features;
 
-    p = tuple->TupleData;
-    q = p + tuple->TupleDataLen;
-    entry->index = *p & 0x3f;
-    entry->flags = 0;
-    if (*p & 0x40)
-	entry->flags |= CISTPL_CFTABLE_DEFAULT;
-    if (*p & 0x80) {
+	p = tuple->TupleData;
+	q = p + tuple->TupleDataLen;
+	entry->index = *p & 0x3f;
+	entry->flags = 0;
+	if (*p & 0x40)
+		entry->flags |= CISTPL_CFTABLE_DEFAULT;
+	if (*p & 0x80) {
+		if (++p == q)
+			return -EINVAL;
+		if (*p & 0x10)
+			entry->flags |= CISTPL_CFTABLE_BVDS;
+		if (*p & 0x20)
+			entry->flags |= CISTPL_CFTABLE_WP;
+		if (*p & 0x40)
+			entry->flags |= CISTPL_CFTABLE_RDYBSY;
+		if (*p & 0x80)
+			entry->flags |= CISTPL_CFTABLE_MWAIT;
+		entry->interface = *p & 0x0f;
+	} else
+		entry->interface = 0;
+
+	/* Process optional features */
 	if (++p == q)
 		return -EINVAL;
-	if (*p & 0x10)
-	    entry->flags |= CISTPL_CFTABLE_BVDS;
-	if (*p & 0x20)
-	    entry->flags |= CISTPL_CFTABLE_WP;
-	if (*p & 0x40)
-	    entry->flags |= CISTPL_CFTABLE_RDYBSY;
-	if (*p & 0x80)
-	    entry->flags |= CISTPL_CFTABLE_MWAIT;
-	entry->interface = *p & 0x0f;
-    } else
-	entry->interface = 0;
+	features = *p; p++;
 
-    /* Process optional features */
-    if (++p == q)
-	    return -EINVAL;
-    features = *p; p++;
+	/* Power options */
+	if ((features & 3) > 0) {
+		p = parse_power(p, q, &entry->vcc);
+		if (p == NULL)
+			return -EINVAL;
+	} else
+		entry->vcc.present = 0;
+	if ((features & 3) > 1) {
+		p = parse_power(p, q, &entry->vpp1);
+		if (p == NULL)
+			return -EINVAL;
+	} else
+		entry->vpp1.present = 0;
+	if ((features & 3) > 2) {
+		p = parse_power(p, q, &entry->vpp2);
+		if (p == NULL)
+			return -EINVAL;
+	} else
+		entry->vpp2.present = 0;
 
-    /* Power options */
-    if ((features & 3) > 0) {
-	p = parse_power(p, q, &entry->vcc);
-	if (p == NULL)
-		return -EINVAL;
-    } else
-	entry->vcc.present = 0;
-    if ((features & 3) > 1) {
-	p = parse_power(p, q, &entry->vpp1);
-	if (p == NULL)
-		return -EINVAL;
-    } else
-	entry->vpp1.present = 0;
-    if ((features & 3) > 2) {
-	p = parse_power(p, q, &entry->vpp2);
-	if (p == NULL)
-		return -EINVAL;
-    } else
-	entry->vpp2.present = 0;
+	/* Timing options */
+	if (features & 0x04) {
+		p = parse_timing(p, q, &entry->timing);
+		if (p == NULL)
+			return -EINVAL;
+	} else {
+		entry->timing.wait = 0;
+		entry->timing.ready = 0;
+		entry->timing.reserved = 0;
+	}
 
-    /* Timing options */
-    if (features & 0x04) {
-	p = parse_timing(p, q, &entry->timing);
-	if (p == NULL)
-		return -EINVAL;
-    } else {
-	entry->timing.wait = 0;
-	entry->timing.ready = 0;
-	entry->timing.reserved = 0;
-    }
+	/* I/O window options */
+	if (features & 0x08) {
+		p = parse_io(p, q, &entry->io);
+		if (p == NULL)
+			return -EINVAL;
+	} else
+		entry->io.nwin = 0;
 
-    /* I/O window options */
-    if (features & 0x08) {
-	p = parse_io(p, q, &entry->io);
-	if (p == NULL)
-		return -EINVAL;
-    } else
-	entry->io.nwin = 0;
+	/* Interrupt options */
+	if (features & 0x10) {
+		p = parse_irq(p, q, &entry->irq);
+		if (p == NULL)
+			return -EINVAL;
+	} else
+		entry->irq.IRQInfo1 = 0;
 
-    /* Interrupt options */
-    if (features & 0x10) {
-	p = parse_irq(p, q, &entry->irq);
-	if (p == NULL)
-		return -EINVAL;
-    } else
-	entry->irq.IRQInfo1 = 0;
+	switch (features & 0x60) {
+	case 0x00:
+		entry->mem.nwin = 0;
+		break;
+	case 0x20:
+		entry->mem.nwin = 1;
+		entry->mem.win[0].len = get_unaligned_le16(p) << 8;
+		entry->mem.win[0].card_addr = 0;
+		entry->mem.win[0].host_addr = 0;
+		p += 2;
+		if (p > q)
+			return -EINVAL;
+		break;
+	case 0x40:
+		entry->mem.nwin = 1;
+		entry->mem.win[0].len = get_unaligned_le16(p) << 8;
+		entry->mem.win[0].card_addr = get_unaligned_le16(p + 2) << 8;
+		entry->mem.win[0].host_addr = 0;
+		p += 4;
+		if (p > q)
+			return -EINVAL;
+		break;
+	case 0x60:
+		p = parse_mem(p, q, &entry->mem);
+		if (p == NULL)
+			return -EINVAL;
+		break;
+	}
 
-    switch (features & 0x60) {
-    case 0x00:
-	entry->mem.nwin = 0;
-	break;
-    case 0x20:
-	entry->mem.nwin = 1;
-	entry->mem.win[0].len = get_unaligned_le16(p) << 8;
-	entry->mem.win[0].card_addr = 0;
-	entry->mem.win[0].host_addr = 0;
-	p += 2;
-	if (p > q)
-		return -EINVAL;
-	break;
-    case 0x40:
-	entry->mem.nwin = 1;
-	entry->mem.win[0].len = get_unaligned_le16(p) << 8;
-	entry->mem.win[0].card_addr = get_unaligned_le16(p + 2) << 8;
-	entry->mem.win[0].host_addr = 0;
-	p += 4;
-	if (p > q)
-		return -EINVAL;
-	break;
-    case 0x60:
-	p = parse_mem(p, q, &entry->mem);
-	if (p == NULL)
-		return -EINVAL;
-	break;
-    }
+	/* Misc features */
+	if (features & 0x80) {
+		if (p == q)
+			return -EINVAL;
+		entry->flags |= (*p << 8);
+		while (*p & 0x80)
+			if (++p == q)
+				return -EINVAL;
+		p++;
+	}
 
-    /* Misc features */
-    if (features & 0x80) {
-	if (p == q)
-		return -EINVAL;
-	entry->flags |= (*p << 8);
-	while (*p & 0x80)
-	    if (++p == q)
-		    return -EINVAL;
-	p++;
-    }
+	entry->subtuples = q-p;
 
-    entry->subtuples = q-p;
-
-    return 0;
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_device_geo(tuple_t *tuple, cistpl_device_geo_t *geo)
 {
-    u_char *p, *q;
-    int n;
+	u_char *p, *q;
+	int n;
 
-    p = (u_char *)tuple->TupleData;
-    q = p + tuple->TupleDataLen;
+	p = (u_char *)tuple->TupleData;
+	q = p + tuple->TupleDataLen;
 
-    for (n = 0; n < CISTPL_MAX_DEVICES; n++) {
-	if (p > q-6)
-		break;
-	geo->geo[n].buswidth = p[0];
-	geo->geo[n].erase_block = 1 << (p[1]-1);
-	geo->geo[n].read_block  = 1 << (p[2]-1);
-	geo->geo[n].write_block = 1 << (p[3]-1);
-	geo->geo[n].partition   = 1 << (p[4]-1);
-	geo->geo[n].interleave  = 1 << (p[5]-1);
-	p += 6;
-    }
-    geo->ngeo = n;
-    return 0;
+	for (n = 0; n < CISTPL_MAX_DEVICES; n++) {
+		if (p > q-6)
+			break;
+		geo->geo[n].buswidth = p[0];
+		geo->geo[n].erase_block = 1 << (p[1]-1);
+		geo->geo[n].read_block  = 1 << (p[2]-1);
+		geo->geo[n].write_block = 1 << (p[3]-1);
+		geo->geo[n].partition   = 1 << (p[4]-1);
+		geo->geo[n].interleave  = 1 << (p[5]-1);
+		p += 6;
+	}
+	geo->ngeo = n;
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_vers_2(tuple_t *tuple, cistpl_vers_2_t *v2)
 {
-    u_char *p, *q;
+	u_char *p, *q;
 
-    if (tuple->TupleDataLen < 10)
-	return -EINVAL;
+	if (tuple->TupleDataLen < 10)
+		return -EINVAL;
 
-    p = tuple->TupleData;
-    q = p + tuple->TupleDataLen;
+	p = tuple->TupleData;
+	q = p + tuple->TupleDataLen;
 
-    v2->vers = p[0];
-    v2->comply = p[1];
-    v2->dindex = get_unaligned_le16(p + 2);
-    v2->vspec8 = p[6];
-    v2->vspec9 = p[7];
-    v2->nhdr = p[8];
-    p += 9;
-    return parse_strings(p, q, 2, v2->str, &v2->vendor, NULL);
+	v2->vers = p[0];
+	v2->comply = p[1];
+	v2->dindex = get_unaligned_le16(p + 2);
+	v2->vspec8 = p[6];
+	v2->vspec9 = p[7];
+	v2->nhdr = p[8];
+	p += 9;
+	return parse_strings(p, q, 2, v2->str, &v2->vendor, NULL);
 }
 
-/*====================================================================*/
 
 static int parse_org(tuple_t *tuple, cistpl_org_t *org)
 {
-    u_char *p, *q;
-    int i;
+	u_char *p, *q;
+	int i;
 
-    p = tuple->TupleData;
-    q = p + tuple->TupleDataLen;
-    if (p == q)
-	    return -EINVAL;
-    org->data_org = *p;
-    if (++p == q)
-	    return -EINVAL;
-    for (i = 0; i < 30; i++) {
-	org->desc[i] = *p;
-	if (*p == '\0')
-		break;
+	p = tuple->TupleData;
+	q = p + tuple->TupleDataLen;
+	if (p == q)
+		return -EINVAL;
+	org->data_org = *p;
 	if (++p == q)
 		return -EINVAL;
-    }
-    return 0;
+	for (i = 0; i < 30; i++) {
+		org->desc[i] = *p;
+		if (*p == '\0')
+			break;
+		if (++p == q)
+			return -EINVAL;
+	}
+	return 0;
 }
 
-/*====================================================================*/
 
 static int parse_format(tuple_t *tuple, cistpl_format_t *fmt)
 {
-    u_char *p;
+	u_char *p;
 
-    if (tuple->TupleDataLen < 10)
-	return -EINVAL;
+	if (tuple->TupleDataLen < 10)
+		return -EINVAL;
 
-    p = tuple->TupleData;
+	p = tuple->TupleData;
 
-    fmt->type = p[0];
-    fmt->edc = p[1];
-    fmt->offset = get_unaligned_le32(p + 2);
-    fmt->length = get_unaligned_le32(p + 6);
+	fmt->type = p[0];
+	fmt->edc = p[1];
+	fmt->offset = get_unaligned_le32(p + 2);
+	fmt->length = get_unaligned_le32(p + 6);
 
-    return 0;
+	return 0;
 }
 
-/*====================================================================*/
 
 int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse)
 {
-    int ret = 0;
+	int ret = 0;
 
-    if (tuple->TupleDataLen > tuple->TupleDataMax)
-	return -EINVAL;
-    switch (tuple->TupleCode) {
-    case CISTPL_DEVICE:
-    case CISTPL_DEVICE_A:
-	ret = parse_device(tuple, &parse->device);
-	break;
-    case CISTPL_CHECKSUM:
-	ret = parse_checksum(tuple, &parse->checksum);
-	break;
-    case CISTPL_LONGLINK_A:
-    case CISTPL_LONGLINK_C:
-	ret = parse_longlink(tuple, &parse->longlink);
-	break;
-    case CISTPL_LONGLINK_MFC:
-	ret = parse_longlink_mfc(tuple, &parse->longlink_mfc);
-	break;
-    case CISTPL_VERS_1:
-	ret = parse_vers_1(tuple, &parse->version_1);
-	break;
-    case CISTPL_ALTSTR:
-	ret = parse_altstr(tuple, &parse->altstr);
-	break;
-    case CISTPL_JEDEC_A:
-    case CISTPL_JEDEC_C:
-	ret = parse_jedec(tuple, &parse->jedec);
-	break;
-    case CISTPL_MANFID:
-	ret = parse_manfid(tuple, &parse->manfid);
-	break;
-    case CISTPL_FUNCID:
-	ret = parse_funcid(tuple, &parse->funcid);
-	break;
-    case CISTPL_FUNCE:
-	ret = parse_funce(tuple, &parse->funce);
-	break;
-    case CISTPL_CONFIG:
-	ret = parse_config(tuple, &parse->config);
-	break;
-    case CISTPL_CFTABLE_ENTRY:
-	ret = parse_cftable_entry(tuple, &parse->cftable_entry);
-	break;
-    case CISTPL_DEVICE_GEO:
-    case CISTPL_DEVICE_GEO_A:
-	ret = parse_device_geo(tuple, &parse->device_geo);
-	break;
-    case CISTPL_VERS_2:
-	ret = parse_vers_2(tuple, &parse->vers_2);
-	break;
-    case CISTPL_ORG:
-	ret = parse_org(tuple, &parse->org);
-	break;
-    case CISTPL_FORMAT:
-    case CISTPL_FORMAT_A:
-	ret = parse_format(tuple, &parse->format);
-	break;
-    case CISTPL_NO_LINK:
-    case CISTPL_LINKTARGET:
-	ret = 0;
-	break;
-    default:
-	ret = -EINVAL;
-	break;
-    }
-    if (ret)
-	    pr_debug("parse_tuple failed %d\n", ret);
-    return ret;
+	if (tuple->TupleDataLen > tuple->TupleDataMax)
+		return -EINVAL;
+	switch (tuple->TupleCode) {
+	case CISTPL_DEVICE:
+	case CISTPL_DEVICE_A:
+		ret = parse_device(tuple, &parse->device);
+		break;
+	case CISTPL_CHECKSUM:
+		ret = parse_checksum(tuple, &parse->checksum);
+		break;
+	case CISTPL_LONGLINK_A:
+	case CISTPL_LONGLINK_C:
+		ret = parse_longlink(tuple, &parse->longlink);
+		break;
+	case CISTPL_LONGLINK_MFC:
+		ret = parse_longlink_mfc(tuple, &parse->longlink_mfc);
+		break;
+	case CISTPL_VERS_1:
+		ret = parse_vers_1(tuple, &parse->version_1);
+		break;
+	case CISTPL_ALTSTR:
+		ret = parse_altstr(tuple, &parse->altstr);
+		break;
+	case CISTPL_JEDEC_A:
+	case CISTPL_JEDEC_C:
+		ret = parse_jedec(tuple, &parse->jedec);
+		break;
+	case CISTPL_MANFID:
+		ret = parse_manfid(tuple, &parse->manfid);
+		break;
+	case CISTPL_FUNCID:
+		ret = parse_funcid(tuple, &parse->funcid);
+		break;
+	case CISTPL_FUNCE:
+		ret = parse_funce(tuple, &parse->funce);
+		break;
+	case CISTPL_CONFIG:
+		ret = parse_config(tuple, &parse->config);
+		break;
+	case CISTPL_CFTABLE_ENTRY:
+		ret = parse_cftable_entry(tuple, &parse->cftable_entry);
+		break;
+	case CISTPL_DEVICE_GEO:
+	case CISTPL_DEVICE_GEO_A:
+		ret = parse_device_geo(tuple, &parse->device_geo);
+		break;
+	case CISTPL_VERS_2:
+		ret = parse_vers_2(tuple, &parse->vers_2);
+		break;
+	case CISTPL_ORG:
+		ret = parse_org(tuple, &parse->org);
+		break;
+	case CISTPL_FORMAT:
+	case CISTPL_FORMAT_A:
+		ret = parse_format(tuple, &parse->format);
+		break;
+	case CISTPL_NO_LINK:
+	case CISTPL_LINKTARGET:
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	if (ret)
+		pr_debug("parse_tuple failed %d\n", ret);
+	return ret;
 }
 EXPORT_SYMBOL(pcmcia_parse_tuple);
 
-/*======================================================================
 
-    This is used internally by Card Services to look up CIS stuff.
-
-======================================================================*/
-
-int pccard_read_tuple(struct pcmcia_socket *s, unsigned int function, cisdata_t code, void *parse)
+/**
+ * pccard_read_tuple() - internal CIS tuple access
+ * @s:		the struct pcmcia_socket where the card is inserted
+ * @function:	the device function we loop for
+ * @code:	which CIS code shall we look for?
+ * @parse:	buffer where the tuple shall be parsed (or NULL, if no parse)
+ *
+ * pccard_read_tuple() reads out one tuple and attempts to parse it
+ */
+int pccard_read_tuple(struct pcmcia_socket *s, unsigned int function,
+		cisdata_t code, void *parse)
 {
-    tuple_t tuple;
-    cisdata_t *buf;
-    int ret;
+	tuple_t tuple;
+	cisdata_t *buf;
+	int ret;
 
-    buf = kmalloc(256, GFP_KERNEL);
-    if (buf == NULL) {
-	    dev_printk(KERN_WARNING, &s->dev, "no memory to read tuple\n");
-	    return -ENOMEM;
-    }
-    tuple.DesiredTuple = code;
-    tuple.Attributes = 0;
-    if (function == BIND_FN_ALL)
-	    tuple.Attributes = TUPLE_RETURN_COMMON;
-    ret = pccard_get_first_tuple(s, function, &tuple);
-    if (ret != 0)
-	    goto done;
-    tuple.TupleData = buf;
-    tuple.TupleOffset = 0;
-    tuple.TupleDataMax = 255;
-    ret = pccard_get_tuple_data(s, &tuple);
-    if (ret != 0)
-	    goto done;
-    ret = pcmcia_parse_tuple(&tuple, parse);
+	buf = kmalloc(256, GFP_KERNEL);
+	if (buf == NULL) {
+		dev_printk(KERN_WARNING, &s->dev, "no memory to read tuple\n");
+		return -ENOMEM;
+	}
+	tuple.DesiredTuple = code;
+	tuple.Attributes = 0;
+	if (function == BIND_FN_ALL)
+		tuple.Attributes = TUPLE_RETURN_COMMON;
+	ret = pccard_get_first_tuple(s, function, &tuple);
+	if (ret != 0)
+		goto done;
+	tuple.TupleData = buf;
+	tuple.TupleOffset = 0;
+	tuple.TupleDataMax = 255;
+	ret = pccard_get_tuple_data(s, &tuple);
+	if (ret != 0)
+		goto done;
+	ret = pcmcia_parse_tuple(&tuple, parse);
 done:
-    kfree(buf);
-    return ret;
+	kfree(buf);
+	return ret;
 }
 
 
diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c
index 3889cf0..9254ab0 100644
--- a/drivers/pcmcia/db1xxx_ss.c
+++ b/drivers/pcmcia/db1xxx_ss.c
@@ -42,7 +42,6 @@
 	int		nr;		/* socket number */
 	void		*virt_io;
 
-	/* the "pseudo" addresses of the PCMCIA space. */
 	phys_addr_t	phys_io;
 	phys_addr_t	phys_attr;
 	phys_addr_t	phys_mem;
@@ -437,7 +436,7 @@
 	 * This includes IRQs for Carddetection/ejection, the card
 	 *  itself and optional status change detection.
 	 * Also, the memory areas covered by a socket.  For these
-	 *  we require the 32bit "pseudo" addresses (see the au1000.h
+	 *  we require the real 36bit addresses (see the au1000.h
 	 *  header for more information).
 	 */
 
@@ -459,11 +458,7 @@
 
 	ret = -ENODEV;
 
-	/*
-	 * pseudo-attr:  The 32bit address of the PCMCIA attribute space
-	 * for this socket (usually the 36bit address shifted 4 to the
-	 * right).
-	 */
+	/* 36bit PCMCIA Attribute area address */
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcmcia-attr");
 	if (!r) {
 		printk(KERN_ERR "pcmcia%d has no 'pseudo-attr' resource!\n",
@@ -472,10 +467,7 @@
 	}
 	sock->phys_attr = r->start;
 
-	/*
-	 * pseudo-mem:  The 32bit address of the PCMCIA memory space for
-	 * this socket (usually the 36bit address shifted 4 to the right)
-	 */
+	/* 36bit PCMCIA Memory area address */
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcmcia-mem");
 	if (!r) {
 		printk(KERN_ERR "pcmcia%d has no 'pseudo-mem' resource!\n",
@@ -484,10 +476,7 @@
 	}
 	sock->phys_mem = r->start;
 
-	/*
-	 * pseudo-io:  The 32bit address of the PCMCIA IO space for this
-	 * socket (usually the 36bit address shifted 4 to the right).
-	 */
+	/* 36bit PCMCIA IO area address */
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcmcia-io");
 	if (!r) {
 		printk(KERN_ERR "pcmcia%d has no 'pseudo-io' resource!\n",
diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c
index e1741cd..7c20491 100644
--- a/drivers/pcmcia/pd6729.c
+++ b/drivers/pcmcia/pd6729.c
@@ -48,23 +48,13 @@
  *     Specifies the interrupt delivery mode.  The default (1) is to use PCI
  *     interrupts; a value of 0 selects ISA interrupts. This must be set for
  *     correct operation of PCI card readers.
- *
- *  irq_list=i,j,...
- *     This list limits the set of interrupts that can be used by PCMCIA
- *     cards.
- *     The default list is 3,4,5,7,9,10,11.
- *     (irq_list parameter is not used, if irq_mode = 1)
  */
 
 static int irq_mode = 1; /* 0 = ISA interrupt, 1 = PCI interrupt */
-static int irq_list[16];
-static unsigned int irq_list_count = 0;
 
 module_param(irq_mode, int, 0444);
-module_param_array(irq_list, int, &irq_list_count, 0444);
 MODULE_PARM_DESC(irq_mode,
 		"interrupt delivery mode. 0 = ISA, 1 = PCI. default is 1");
-MODULE_PARM_DESC(irq_list, "interrupts that can be used by PCMCIA cards");
 
 static DEFINE_SPINLOCK(port_lock);
 
@@ -605,13 +595,7 @@
 		return 0;
 	}
 
-	if (irq_list_count == 0)
-		mask0 = 0xffff;
-	else
-		for (i = mask0 = 0; i < irq_list_count; i++)
-			mask0 |= (1<<irq_list[i]);
-
-	mask0 &= PD67_MASK;
+	mask0 = PD67_MASK;
 
 	/* just find interrupts that aren't in use */
 	for (i = 0; i < 16; i++)
diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c
index e6f7d41..452c83b 100644
--- a/drivers/pcmcia/rsrc_mgr.c
+++ b/drivers/pcmcia/rsrc_mgr.c
@@ -79,9 +79,8 @@
 
 #ifdef CONFIG_X86
 	if (res->flags & IORESOURCE_IO) {
-		if (start & 0x300) {
+		if (start & 0x300)
 			start = (start + 0x3ff) & ~0x3ff;
-		}
 	}
 #endif
 
diff --git a/drivers/pcmcia/xxs1500_ss.c b/drivers/pcmcia/xxs1500_ss.c
index 61560cd..f9009d3 100644
--- a/drivers/pcmcia/xxs1500_ss.c
+++ b/drivers/pcmcia/xxs1500_ss.c
@@ -218,11 +218,7 @@
 
 	ret = -ENODEV;
 
-	/*
-	 * pseudo-attr:  The 32bit address of the PCMCIA attribute space
-	 * for this socket (usually the 36bit address shifted 4 to the
-	 * right).
-	 */
+	/* 36bit PCMCIA Attribute area address */
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcmcia-attr");
 	if (!r) {
 		dev_err(&pdev->dev, "missing 'pcmcia-attr' resource!\n");
@@ -230,10 +226,7 @@
 	}
 	sock->phys_attr = r->start;
 
-	/*
-	 * pseudo-mem:  The 32bit address of the PCMCIA memory space for
-	 * this socket (usually the 36bit address shifted 4 to the right)
-	 */
+	/* 36bit PCMCIA Memory area address */
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcmcia-mem");
 	if (!r) {
 		dev_err(&pdev->dev, "missing 'pcmcia-mem' resource!\n");
@@ -241,10 +234,7 @@
 	}
 	sock->phys_mem = r->start;
 
-	/*
-	 * pseudo-io:  The 32bit address of the PCMCIA IO space for this
-	 * socket (usually the 36bit address shifted 4 to the right).
-	 */
+	/* 36bit PCMCIA IO area address */
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcmcia-io");
 	if (!r) {
 		dev_err(&pdev->dev, "missing 'pcmcia-io' resource!\n");
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index b85375f..967c766 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1408,10 +1408,10 @@
 	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7510, TI12XX),
 	CB_ID(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_7610, TI12XX),
 
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_710, TI12XX),
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_712, TI12XX),
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_720, TI12XX),
-	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_722, TI12XX),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_710, ENE),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_712, ENE),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_720, ENE),
+	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_722, ENE),
 	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1211, ENE),
 	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1225, ENE),
 	CB_ID(PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_1410, ENE),
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index d4b3d67..bf14672 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -98,10 +98,10 @@
 	  Say Y to enable support for battery measured by WM97xx aux port.
 
 config BATTERY_BQ27x00
-	tristate "BQ27200 battery driver"
+	tristate "BQ27x00 battery driver"
 	depends on I2C
 	help
-	  Say Y here to enable support for batteries with BQ27200(I2C) chip.
+	  Say Y here to enable support for batteries with BQ27x00 (I2C) chips.
 
 config BATTERY_DA9030
 	tristate "DA9030 battery driver"
diff --git a/drivers/power/bq27x00_battery.c b/drivers/power/bq27x00_battery.c
index 62bb981..bece33e 100644
--- a/drivers/power/bq27x00_battery.c
+++ b/drivers/power/bq27x00_battery.c
@@ -26,13 +26,22 @@
 #include <linux/i2c.h>
 #include <asm/unaligned.h>
 
-#define DRIVER_VERSION			"1.0.0"
+#define DRIVER_VERSION			"1.1.0"
 
 #define BQ27x00_REG_TEMP		0x06
 #define BQ27x00_REG_VOLT		0x08
-#define BQ27x00_REG_RSOC		0x0B /* Relative State-of-Charge */
 #define BQ27x00_REG_AI			0x14
 #define BQ27x00_REG_FLAGS		0x0A
+#define BQ27x00_REG_TTE			0x16
+#define BQ27x00_REG_TTF			0x18
+#define BQ27x00_REG_TTECP		0x26
+
+#define BQ27000_REG_RSOC		0x0B /* Relative State-of-Charge */
+#define BQ27000_FLAG_CHGS		BIT(7)
+
+#define BQ27500_REG_SOC			0x2c
+#define BQ27500_FLAG_DSC		BIT(0)
+#define BQ27500_FLAG_FC			BIT(9)
 
 /* If the system has several batteries we need a different name for each
  * of them...
@@ -46,25 +55,28 @@
 		struct bq27x00_device_info *di);
 };
 
+enum bq27x00_chip { BQ27000, BQ27500 };
+
 struct bq27x00_device_info {
 	struct device 		*dev;
 	int			id;
-	int			voltage_uV;
-	int			current_uA;
-	int			temp_C;
-	int			charge_rsoc;
 	struct bq27x00_access_methods	*bus;
 	struct power_supply	bat;
+	enum bq27x00_chip	chip;
 
 	struct i2c_client	*client;
 };
 
 static enum power_supply_property bq27x00_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CAPACITY,
 	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
+	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
 };
 
 /*
@@ -74,16 +86,11 @@
 static int bq27x00_read(u8 reg, int *rt_value, int b_single,
 			struct bq27x00_device_info *di)
 {
-	int ret;
-
-	ret = di->bus->read(reg, rt_value, b_single, di);
-	*rt_value = be16_to_cpu(*rt_value);
-
-	return ret;
+	return di->bus->read(reg, rt_value, b_single, di);
 }
 
 /*
- * Return the battery temperature in Celsius degrees
+ * Return the battery temperature in tenths of degree Celsius
  * Or < 0 if something fails.
  */
 static int bq27x00_battery_temperature(struct bq27x00_device_info *di)
@@ -97,7 +104,10 @@
 		return ret;
 	}
 
-	return (temp >> 2) - 273;
+	if (di->chip == BQ27500)
+		return temp - 2731;
+	else
+		return ((temp >> 2) - 273) * 10;
 }
 
 /*
@@ -115,7 +125,7 @@
 		return ret;
 	}
 
-	return volt;
+	return volt * 1000;
 }
 
 /*
@@ -134,16 +144,23 @@
 		dev_err(di->dev, "error reading current\n");
 		return 0;
 	}
-	ret = bq27x00_read(BQ27x00_REG_FLAGS, &flags, 0, di);
-	if (ret < 0) {
-		dev_err(di->dev, "error reading flags\n");
-		return 0;
+
+	if (di->chip == BQ27500) {
+		/* bq27500 returns signed value */
+		curr = (int)(s16)curr;
+	} else {
+		ret = bq27x00_read(BQ27x00_REG_FLAGS, &flags, 0, di);
+		if (ret < 0) {
+			dev_err(di->dev, "error reading flags\n");
+			return 0;
+		}
+		if (flags & BQ27000_FLAG_CHGS) {
+			dev_dbg(di->dev, "negative current!\n");
+			curr = -curr;
+		}
 	}
-	if ((flags & (1 << 7)) != 0) {
-		dev_dbg(di->dev, "negative current!\n");
-		return -curr;
-	}
-	return curr;
+
+	return curr * 1000;
 }
 
 /*
@@ -155,13 +172,70 @@
 	int ret;
 	int rsoc = 0;
 
-	ret = bq27x00_read(BQ27x00_REG_RSOC, &rsoc, 1, di);
+	if (di->chip == BQ27500)
+		ret = bq27x00_read(BQ27500_REG_SOC, &rsoc, 0, di);
+	else
+		ret = bq27x00_read(BQ27000_REG_RSOC, &rsoc, 1, di);
 	if (ret) {
 		dev_err(di->dev, "error reading relative State-of-Charge\n");
 		return ret;
 	}
 
-	return rsoc >> 8;
+	return rsoc;
+}
+
+static int bq27x00_battery_status(struct bq27x00_device_info *di,
+				  union power_supply_propval *val)
+{
+	int flags = 0;
+	int status;
+	int ret;
+
+	ret = bq27x00_read(BQ27x00_REG_FLAGS, &flags, 0, di);
+	if (ret < 0) {
+		dev_err(di->dev, "error reading flags\n");
+		return ret;
+	}
+
+	if (di->chip == BQ27500) {
+		if (flags & BQ27500_FLAG_FC)
+			status = POWER_SUPPLY_STATUS_FULL;
+		else if (flags & BQ27500_FLAG_DSC)
+			status = POWER_SUPPLY_STATUS_DISCHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_CHARGING;
+	} else {
+		if (flags & BQ27000_FLAG_CHGS)
+			status = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			status = POWER_SUPPLY_STATUS_DISCHARGING;
+	}
+
+	val->intval = status;
+	return 0;
+}
+
+/*
+ * Read a time register.
+ * Return < 0 if something fails.
+ */
+static int bq27x00_battery_time(struct bq27x00_device_info *di, int reg,
+				union power_supply_propval *val)
+{
+	int tval = 0;
+	int ret;
+
+	ret = bq27x00_read(reg, &tval, 0, di);
+	if (ret) {
+		dev_err(di->dev, "error reading register %02x\n", reg);
+		return ret;
+	}
+
+	if (tval == 65535)
+		return -ENODATA;
+
+	val->intval = tval * 60;
+	return 0;
 }
 
 #define to_bq27x00_device_info(x) container_of((x), \
@@ -171,9 +245,13 @@
 					enum power_supply_property psp,
 					union power_supply_propval *val)
 {
+	int ret = 0;
 	struct bq27x00_device_info *di = to_bq27x00_device_info(psy);
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = bq27x00_battery_status(di, val);
+		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 	case POWER_SUPPLY_PROP_PRESENT:
 		val->intval = bq27x00_battery_voltage(di);
@@ -189,11 +267,20 @@
 	case POWER_SUPPLY_PROP_TEMP:
 		val->intval = bq27x00_battery_temperature(di);
 		break;
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
+		ret = bq27x00_battery_time(di, BQ27x00_REG_TTE, val);
+		break;
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
+		ret = bq27x00_battery_time(di, BQ27x00_REG_TTECP, val);
+		break;
+	case POWER_SUPPLY_PROP_TIME_TO_FULL_NOW:
+		ret = bq27x00_battery_time(di, BQ27x00_REG_TTF, val);
+		break;
 	default:
 		return -EINVAL;
 	}
 
-	return 0;
+	return ret;
 }
 
 static void bq27x00_powersupply_init(struct bq27x00_device_info *di)
@@ -206,10 +293,10 @@
 }
 
 /*
- * BQ27200 specific code
+ * i2c specific code
  */
 
-static int bq27200_read(u8 reg, int *rt_value, int b_single,
+static int bq27x00_read_i2c(u8 reg, int *rt_value, int b_single,
 			struct bq27x00_device_info *di)
 {
 	struct i2c_client *client = di->client;
@@ -238,7 +325,7 @@
 		err = i2c_transfer(client->adapter, msg, 1);
 		if (err >= 0) {
 			if (!b_single)
-				*rt_value = get_unaligned_be16(data);
+				*rt_value = get_unaligned_le16(data);
 			else
 				*rt_value = data[0];
 
@@ -248,7 +335,7 @@
 	return err;
 }
 
-static int bq27200_battery_probe(struct i2c_client *client,
+static int bq27x00_battery_probe(struct i2c_client *client,
 				 const struct i2c_device_id *id)
 {
 	char *name;
@@ -267,7 +354,7 @@
 	if (retval < 0)
 		return retval;
 
-	name = kasprintf(GFP_KERNEL, "bq27200-%d", num);
+	name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num);
 	if (!name) {
 		dev_err(&client->dev, "failed to allocate device name\n");
 		retval = -ENOMEM;
@@ -281,6 +368,7 @@
 		goto batt_failed_2;
 	}
 	di->id = num;
+	di->chip = id->driver_data;
 
 	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
 	if (!bus) {
@@ -293,7 +381,7 @@
 	i2c_set_clientdata(client, di);
 	di->dev = &client->dev;
 	di->bat.name = name;
-	bus->read = &bq27200_read;
+	bus->read = &bq27x00_read_i2c;
 	di->bus = bus;
 	di->client = client;
 
@@ -323,7 +411,7 @@
 	return retval;
 }
 
-static int bq27200_battery_remove(struct i2c_client *client)
+static int bq27x00_battery_remove(struct i2c_client *client)
 {
 	struct bq27x00_device_info *di = i2c_get_clientdata(client);
 
@@ -344,27 +432,28 @@
  * Module stuff
  */
 
-static const struct i2c_device_id bq27200_id[] = {
-	{ "bq27200", 0 },
+static const struct i2c_device_id bq27x00_id[] = {
+	{ "bq27200", BQ27000 },	/* bq27200 is same as bq27000, but with i2c */
+	{ "bq27500", BQ27500 },
 	{},
 };
 
-static struct i2c_driver bq27200_battery_driver = {
+static struct i2c_driver bq27x00_battery_driver = {
 	.driver = {
-		.name = "bq27200-battery",
+		.name = "bq27x00-battery",
 	},
-	.probe = bq27200_battery_probe,
-	.remove = bq27200_battery_remove,
-	.id_table = bq27200_id,
+	.probe = bq27x00_battery_probe,
+	.remove = bq27x00_battery_remove,
+	.id_table = bq27x00_id,
 };
 
 static int __init bq27x00_battery_init(void)
 {
 	int ret;
 
-	ret = i2c_add_driver(&bq27200_battery_driver);
+	ret = i2c_add_driver(&bq27x00_battery_driver);
 	if (ret)
-		printk(KERN_ERR "Unable to register BQ27200 driver\n");
+		printk(KERN_ERR "Unable to register BQ27x00 driver\n");
 
 	return ret;
 }
@@ -372,7 +461,7 @@
 
 static void __exit bq27x00_battery_exit(void)
 {
-	i2c_del_driver(&bq27200_battery_driver);
+	i2c_del_driver(&bq27x00_battery_driver);
 }
 module_exit(bq27x00_battery_exit);
 
diff --git a/drivers/power/da9030_battery.c b/drivers/power/da9030_battery.c
index 3364198..a2e71f7 100644
--- a/drivers/power/da9030_battery.c
+++ b/drivers/power/da9030_battery.c
@@ -509,7 +509,7 @@
 
 	charger->master = pdev->dev.parent;
 
-	/* 10 seconds between monotor runs unless platfrom defines other
+	/* 10 seconds between monitor runs unless platform defines other
 	   interval */
 	charger->interval = msecs_to_jiffies(
 		(pdata->batmon_interval ? : 10) * 1000);
diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index 6ea3cb5..23eed35 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -26,7 +26,7 @@
 
 static DEFINE_MUTEX(bat_lock);
 static struct work_struct bat_work;
-struct mutex work_lock;
+static struct mutex work_lock;
 static int bat_status = POWER_SUPPLY_STATUS_UNKNOWN;
 static struct wm97xx_batt_info *gpdata;
 static enum power_supply_property *prop;
@@ -203,7 +203,7 @@
 			goto err2;
 		ret = request_irq(gpio_to_irq(pdata->charge_gpio),
 				wm97xx_chrg_irq, IRQF_DISABLED,
-				"AC Detect", 0);
+				"AC Detect", dev);
 		if (ret)
 			goto err2;
 		props++;	/* POWER_SUPPLY_PROP_STATUS */
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 262f62e..834b484 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -27,6 +27,17 @@
 	help
 	  Say yes here to enable debugging support.
 
+config REGULATOR_DUMMY
+	bool "Provide a dummy regulator if regulator lookups fail"
+	help
+	  If this option is enabled then when a regulator lookup fails
+	  and the board has not specified that it has provided full
+	  constraints then the regulator core will provide an always
+	  enabled dummy regulator will be provided, allowing consumer
+	  drivers to continue.
+
+	  A warning will be generated when this substitution is done.
+
 config REGULATOR_FIXED_VOLTAGE
 	tristate "Fixed voltage regulator support"
 	help
@@ -69,6 +80,13 @@
 	  regulator via I2C bus. The provided regulator is suitable
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
+config REGULATOR_MAX8649
+	tristate "Maxim 8649 voltage regulator"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8649 voltage output regulator via
+	  I2C bus.
+
 config REGULATOR_MAX8660
 	tristate "Maxim 8660/8661 voltage regulator"
 	depends on I2C
@@ -91,19 +109,26 @@
 	  of PMIC devices.
 
 config REGULATOR_WM8350
-	tristate "Wolfson Microelectroncis WM8350 AudioPlus PMIC"
+	tristate "Wolfson Microelectronics WM8350 AudioPlus PMIC"
 	depends on MFD_WM8350
 	help
 	  This driver provides support for the voltage and current regulators
           of the WM8350 AudioPlus PMIC.
 
 config REGULATOR_WM8400
-	tristate "Wolfson Microelectroncis WM8400 AudioPlus PMIC"
+	tristate "Wolfson Microelectronics WM8400 AudioPlus PMIC"
 	depends on MFD_WM8400
 	help
 	  This driver provides support for the voltage regulators of the
 	  WM8400 AudioPlus PMIC.
 
+config REGULATOR_WM8994
+	tristate "Wolfson Microelectronics WM8994 CODEC"
+	depends on MFD_WM8994
+	help
+	  This driver provides support for the voltage regulators on the
+	  WM8994 CODEC.
+
 config REGULATOR_DA903X
 	tristate "Support regulators on Dialog Semiconductor DA9030/DA9034 PMIC"
 	depends on PMIC_DA903X
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index b3c806c..e845b66 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -9,15 +9,18 @@
 obj-$(CONFIG_REGULATOR_USERSPACE_CONSUMER) += userspace-consumer.o
 
 obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
+obj-$(CONFIG_REGULATOR_DUMMY) += dummy.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
+obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
 obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o
 obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
 obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o
+obj-$(CONFIG_REGULATOR_WM8994) += wm8994-regulator.o
 obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
 obj-$(CONFIG_REGULATOR_PCF50633) += pcf50633-regulator.o
 obj-$(CONFIG_REGULATOR_PCAP) += pcap-regulator.o
diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c
index b349db4..7de9509 100644
--- a/drivers/regulator/ab3100.c
+++ b/drivers/regulator/ab3100.c
@@ -561,7 +561,7 @@
  * for all the different regulators.
  */
 
-static int __init ab3100_regulators_probe(struct platform_device *pdev)
+static int __devinit ab3100_regulators_probe(struct platform_device *pdev)
 {
 	struct ab3100_platform_data *plfdata = pdev->dev.platform_data;
 	struct ab3100 *ab3100 = platform_get_drvdata(pdev);
@@ -641,7 +641,7 @@
 	return 0;
 }
 
-static int __exit ab3100_regulators_remove(struct platform_device *pdev)
+static int __devexit ab3100_regulators_remove(struct platform_device *pdev)
 {
 	int i;
 
@@ -659,7 +659,7 @@
 		.owner = THIS_MODULE,
 	},
 	.probe = ab3100_regulators_probe,
-	.remove = __exit_p(ab3100_regulators_remove),
+	.remove = __devexit_p(ab3100_regulators_remove),
 };
 
 static __init int ab3100_regulators_init(void)
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index b60a4c9..c7bbe30 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -19,10 +19,13 @@
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/suspend.h>
+#include <linux/delay.h>
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 
+#include "dummy.h"
+
 #define REGULATOR_VERSION "0.5"
 
 static DEFINE_MUTEX(regulator_list_mutex);
@@ -1084,6 +1087,13 @@
 	return NULL;
 }
 
+static int _regulator_get_enable_time(struct regulator_dev *rdev)
+{
+	if (!rdev->desc->ops->enable_time)
+		return 0;
+	return rdev->desc->ops->enable_time(rdev);
+}
+
 /* Internal regulator request function */
 static struct regulator *_regulator_get(struct device *dev, const char *id,
 					int exclusive)
@@ -1115,6 +1125,22 @@
 			goto found;
 		}
 	}
+
+#ifdef CONFIG_REGULATOR_DUMMY
+	if (!devname)
+		devname = "deviceless";
+
+	/* If the board didn't flag that it was fully constrained then
+	 * substitute in a dummy regulator so consumers can continue.
+	 */
+	if (!has_full_constraints) {
+		pr_warning("%s supply %s not found, using dummy regulator\n",
+			   devname, id);
+		rdev = dummy_regulator_rdev;
+		goto found;
+	}
+#endif
+
 	mutex_unlock(&regulator_list_mutex);
 	return regulator;
 
@@ -1251,7 +1277,7 @@
 /* locks held by regulator_enable() */
 static int _regulator_enable(struct regulator_dev *rdev)
 {
-	int ret;
+	int ret, delay;
 
 	/* do we need to enable the supply regulator first */
 	if (rdev->supply) {
@@ -1275,13 +1301,34 @@
 			if (!_regulator_can_change_status(rdev))
 				return -EPERM;
 
-			if (rdev->desc->ops->enable) {
-				ret = rdev->desc->ops->enable(rdev);
-				if (ret < 0)
-					return ret;
-			} else {
+			if (!rdev->desc->ops->enable)
 				return -EINVAL;
+
+			/* Query before enabling in case configuration
+			 * dependant.  */
+			ret = _regulator_get_enable_time(rdev);
+			if (ret >= 0) {
+				delay = ret;
+			} else {
+				printk(KERN_WARNING
+					"%s: enable_time() failed for %s: %d\n",
+					__func__, rdev_get_name(rdev),
+					ret);
+				delay = 0;
 			}
+
+			/* Allow the regulator to ramp; it would be useful
+			 * to extend this for bulk operations so that the
+			 * regulators can ramp together.  */
+			ret = rdev->desc->ops->enable(rdev);
+			if (ret < 0)
+				return ret;
+
+			if (delay >= 1000)
+				mdelay(delay / 1000);
+			else if (delay)
+				udelay(delay);
+
 		} else if (ret < 0) {
 			printk(KERN_ERR "%s: is_enabled() failed for %s: %d\n",
 			       __func__, rdev_get_name(rdev), ret);
@@ -1341,6 +1388,9 @@
 				       __func__, rdev_get_name(rdev));
 				return ret;
 			}
+
+			_notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE,
+					     NULL);
 		}
 
 		/* decrease our supplies ref count and disable if required */
@@ -1399,8 +1449,8 @@
 			return ret;
 		}
 		/* notify other consumers that power has been forced off */
-		_notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE,
-			NULL);
+		_notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
+			REGULATOR_EVENT_DISABLE, NULL);
 	}
 
 	/* decrease our supplies ref count and disable if required */
@@ -1434,9 +1484,9 @@
 
 static int _regulator_is_enabled(struct regulator_dev *rdev)
 {
-	/* sanity check */
+	/* If we don't know then assume that the regulator is always on */
 	if (!rdev->desc->ops->is_enabled)
-		return -EINVAL;
+		return 1;
 
 	return rdev->desc->ops->is_enabled(rdev);
 }
@@ -2451,8 +2501,15 @@
 
 static int __init regulator_init(void)
 {
+	int ret;
+
 	printk(KERN_INFO "regulator: core version %s\n", REGULATOR_VERSION);
-	return class_register(&regulator_class);
+
+	ret = class_register(&regulator_class);
+
+	regulator_dummy_init();
+
+	return ret;
 }
 
 /* init early to allow our consumers to complete system booting */
diff --git a/drivers/regulator/dummy.c b/drivers/regulator/dummy.c
new file mode 100644
index 0000000..c7410bd
--- /dev/null
+++ b/drivers/regulator/dummy.c
@@ -0,0 +1,66 @@
+/*
+ * dummy.c
+ *
+ * Copyright 2010 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This is useful for systems with mixed controllable and
+ * non-controllable regulators, as well as for allowing testing on
+ * systems with no controllable regulators.
+ */
+
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+
+#include "dummy.h"
+
+struct regulator_dev *dummy_regulator_rdev;
+
+static struct regulator_init_data dummy_initdata;
+
+static struct regulator_ops dummy_ops;
+
+static struct regulator_desc dummy_desc = {
+	.name = "dummy",
+	.id = -1,
+	.type = REGULATOR_VOLTAGE,
+	.owner = THIS_MODULE,
+	.ops = &dummy_ops,
+};
+
+static struct platform_device *dummy_pdev;
+
+void __init regulator_dummy_init(void)
+{
+	int ret;
+
+	dummy_pdev = platform_device_alloc("reg-dummy", -1);
+	if (!dummy_pdev) {
+		pr_err("Failed to allocate dummy regulator device\n");
+		return;
+	}
+
+	ret = platform_device_add(dummy_pdev);
+	if (ret != 0) {
+		pr_err("Failed to register dummy regulator device: %d\n", ret);
+		platform_device_put(dummy_pdev);
+		return;
+	}
+
+	dummy_regulator_rdev = regulator_register(&dummy_desc, NULL,
+						  &dummy_initdata, NULL);
+	if (IS_ERR(dummy_regulator_rdev)) {
+		ret = PTR_ERR(dummy_regulator_rdev);
+		pr_err("Failed to register regulator: %d\n", ret);
+		platform_device_unregister(dummy_pdev);
+		return;
+	}
+}
diff --git a/drivers/regulator/dummy.h b/drivers/regulator/dummy.h
new file mode 100644
index 0000000..3921c0e
--- /dev/null
+++ b/drivers/regulator/dummy.h
@@ -0,0 +1,31 @@
+/*
+ * dummy.h
+ *
+ * Copyright 2010 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This is useful for systems with mixed controllable and
+ * non-controllable regulators, as well as for allowing testing on
+ * systems with no controllable regulators.
+ */
+
+#ifndef _DUMMY_H
+#define _DUMMY_H
+
+struct regulator_dev;
+
+extern struct regulator_dev *dummy_regulator_rdev;
+
+#ifdef CONFIG_REGULATOR_DUMMY
+void __init regulator_dummy_init(void);
+#else
+static inline void regulator_dummy_init(void) { }
+#endif
+
+#endif
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index f9f516a..d11f762 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -24,14 +24,16 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/fixed.h>
 #include <linux/gpio.h>
+#include <linux/delay.h>
 
 struct fixed_voltage_data {
 	struct regulator_desc desc;
 	struct regulator_dev *dev;
 	int microvolts;
 	int gpio;
-	unsigned enable_high:1;
-	unsigned is_enabled:1;
+	unsigned startup_delay;
+	bool enable_high;
+	bool is_enabled;
 };
 
 static int fixed_voltage_is_enabled(struct regulator_dev *dev)
@@ -47,7 +49,7 @@
 
 	if (gpio_is_valid(data->gpio)) {
 		gpio_set_value_cansleep(data->gpio, data->enable_high);
-		data->is_enabled = 1;
+		data->is_enabled = true;
 	}
 
 	return 0;
@@ -59,12 +61,19 @@
 
 	if (gpio_is_valid(data->gpio)) {
 		gpio_set_value_cansleep(data->gpio, !data->enable_high);
-		data->is_enabled = 0;
+		data->is_enabled = false;
 	}
 
 	return 0;
 }
 
+static int fixed_voltage_enable_time(struct regulator_dev *dev)
+{
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	return data->startup_delay;
+}
+
 static int fixed_voltage_get_voltage(struct regulator_dev *dev)
 {
 	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
@@ -87,11 +96,12 @@
 	.is_enabled = fixed_voltage_is_enabled,
 	.enable = fixed_voltage_enable,
 	.disable = fixed_voltage_disable,
+	.enable_time = fixed_voltage_enable_time,
 	.get_voltage = fixed_voltage_get_voltage,
 	.list_voltage = fixed_voltage_list_voltage,
 };
 
-static int regulator_fixed_voltage_probe(struct platform_device *pdev)
+static int __devinit reg_fixed_voltage_probe(struct platform_device *pdev)
 {
 	struct fixed_voltage_config *config = pdev->dev.platform_data;
 	struct fixed_voltage_data *drvdata;
@@ -117,6 +127,7 @@
 
 	drvdata->microvolts = config->microvolts;
 	drvdata->gpio = config->gpio;
+	drvdata->startup_delay = config->startup_delay;
 
 	if (gpio_is_valid(config->gpio)) {
 		drvdata->enable_high = config->enable_high;
@@ -163,7 +174,7 @@
 		/* Regulator without GPIO control is considered
 		 * always enabled
 		 */
-		drvdata->is_enabled = 1;
+		drvdata->is_enabled = true;
 	}
 
 	drvdata->dev = regulator_register(&drvdata->desc, &pdev->dev,
@@ -191,7 +202,7 @@
 	return ret;
 }
 
-static int regulator_fixed_voltage_remove(struct platform_device *pdev)
+static int __devexit reg_fixed_voltage_remove(struct platform_device *pdev)
 {
 	struct fixed_voltage_data *drvdata = platform_get_drvdata(pdev);
 
@@ -205,10 +216,11 @@
 }
 
 static struct platform_driver regulator_fixed_voltage_driver = {
-	.probe		= regulator_fixed_voltage_probe,
-	.remove		= regulator_fixed_voltage_remove,
+	.probe		= reg_fixed_voltage_probe,
+	.remove		= __devexit_p(reg_fixed_voltage_remove),
 	.driver		= {
 		.name		= "reg-fixed-voltage",
+		.owner		= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c
index 4f33a0f..f5532ed 100644
--- a/drivers/regulator/lp3971.c
+++ b/drivers/regulator/lp3971.c
@@ -54,7 +54,7 @@
 #define LP3971_BUCK2_BASE 0x29
 #define LP3971_BUCK3_BASE 0x32
 
-const static int buck_base_addr[] = {
+static const int buck_base_addr[] = {
 	LP3971_BUCK1_BASE,
 	LP3971_BUCK2_BASE,
 	LP3971_BUCK3_BASE,
@@ -63,7 +63,7 @@
 #define LP3971_BUCK_TARGET_VOL1_REG(x) (buck_base_addr[x])
 #define LP3971_BUCK_TARGET_VOL2_REG(x) (buck_base_addr[x]+1)
 
-const static int buck_voltage_map[] = {
+static const int buck_voltage_map[] = {
 	   0,  800,  850,  900,  950, 1000, 1050, 1100,
 	1150, 1200, 1250, 1300, 1350, 1400, 1450, 1500,
 	1550, 1600, 1650, 1700, 1800, 1900, 2500, 2800,
@@ -96,17 +96,17 @@
 #define LDO_VOL_CONTR_SHIFT(x) ((x & 1) << 2)
 #define LDO_VOL_CONTR_MASK 0x0f
 
-const static int ldo45_voltage_map[] = {
+static const int ldo45_voltage_map[] = {
 	1000, 1050, 1100, 1150, 1200, 1250, 1300, 1350,
 	1400, 1500, 1800, 1900, 2500, 2800, 3000, 3300,
 };
 
-const static int ldo123_voltage_map[] = {
+static const int ldo123_voltage_map[] = {
 	1800, 1900, 2000, 2100, 2200, 2300, 2400, 2500,
 	2600, 2700, 2800, 2900, 3000, 3100, 3200, 3300,
 };
 
-const static int *ldo_voltage_map[] = {
+static const int *ldo_voltage_map[] = {
 	ldo123_voltage_map, /* LDO1 */
 	ldo123_voltage_map, /* LDO2 */
 	ldo123_voltage_map, /* LDO3 */
@@ -431,20 +431,20 @@
 	return ret;
 }
 
-static int setup_regulators(struct lp3971 *lp3971,
-	struct lp3971_platform_data *pdata)
+static int __devinit setup_regulators(struct lp3971 *lp3971,
+				      struct lp3971_platform_data *pdata)
 {
 	int i, err;
-	int num_regulators = pdata->num_regulators;
-	lp3971->num_regulators = num_regulators;
-	lp3971->rdev = kzalloc(sizeof(struct regulator_dev *) * num_regulators,
-		GFP_KERNEL);
+
+	lp3971->num_regulators = pdata->num_regulators;
+	lp3971->rdev = kcalloc(pdata->num_regulators,
+				sizeof(struct regulator_dev *), GFP_KERNEL);
 
 	/* Instantiate the regulators */
-	for (i = 0; i < num_regulators; i++) {
-		int id = pdata->regulators[i].id;
-		lp3971->rdev[i] = regulator_register(&regulators[id],
-			lp3971->dev, pdata->regulators[i].initdata, lp3971);
+	for (i = 0; i < pdata->num_regulators; i++) {
+		struct lp3971_regulator_subdev *reg = &pdata->regulators[i];
+		lp3971->rdev[i] = regulator_register(&regulators[reg->id],
+					lp3971->dev, reg->initdata, lp3971);
 
 		if (IS_ERR(lp3971->rdev[i])) {
 			err = PTR_ERR(lp3971->rdev[i]);
@@ -455,10 +455,10 @@
 	}
 
 	return 0;
+
 error:
-	for (i = 0; i < num_regulators; i++)
-		if (lp3971->rdev[i])
-			regulator_unregister(lp3971->rdev[i]);
+	while (--i >= 0)
+		regulator_unregister(lp3971->rdev[i]);
 	kfree(lp3971->rdev);
 	lp3971->rdev = NULL;
 	return err;
@@ -472,15 +472,17 @@
 	int ret;
 	u16 val;
 
-	lp3971 = kzalloc(sizeof(struct lp3971), GFP_KERNEL);
-	if (lp3971 == NULL) {
-		ret = -ENOMEM;
-		goto err;
+	if (!pdata) {
+		dev_dbg(&i2c->dev, "No platform init data supplied\n");
+		return -ENODEV;
 	}
 
+	lp3971 = kzalloc(sizeof(struct lp3971), GFP_KERNEL);
+	if (lp3971 == NULL)
+		return -ENOMEM;
+
 	lp3971->i2c = i2c;
 	lp3971->dev = &i2c->dev;
-	i2c_set_clientdata(i2c, lp3971);
 
 	mutex_init(&lp3971->io_lock);
 
@@ -493,19 +495,15 @@
 		goto err_detect;
 	}
 
-	if (pdata) {
-		ret = setup_regulators(lp3971, pdata);
-		if (ret < 0)
-			goto err_detect;
-	} else
-		dev_warn(lp3971->dev, "No platform init data supplied\n");
+	ret = setup_regulators(lp3971, pdata);
+	if (ret < 0)
+		goto err_detect;
 
+	i2c_set_clientdata(i2c, lp3971);
 	return 0;
 
 err_detect:
-	i2c_set_clientdata(i2c, NULL);
 	kfree(lp3971);
-err:
 	return ret;
 }
 
@@ -513,11 +511,13 @@
 {
 	struct lp3971 *lp3971 = i2c_get_clientdata(i2c);
 	int i;
-	for (i = 0; i < lp3971->num_regulators; i++)
-		if (lp3971->rdev[i])
-			regulator_unregister(lp3971->rdev[i]);
-	kfree(lp3971->rdev);
+
 	i2c_set_clientdata(i2c, NULL);
+
+	for (i = 0; i < lp3971->num_regulators; i++)
+		regulator_unregister(lp3971->rdev[i]);
+
+	kfree(lp3971->rdev);
 	kfree(lp3971);
 
 	return 0;
diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c
index 2c082d3..a49fc95 100644
--- a/drivers/regulator/max1586.c
+++ b/drivers/regulator/max1586.c
@@ -179,8 +179,8 @@
 	},
 };
 
-static int max1586_pmic_probe(struct i2c_client *client,
-			      const struct i2c_device_id *i2c_id)
+static int __devinit max1586_pmic_probe(struct i2c_client *client,
+					const struct i2c_device_id *i2c_id)
 {
 	struct regulator_dev **rdev;
 	struct max1586_platform_data *pdata = client->dev.platform_data;
@@ -235,7 +235,7 @@
 	return ret;
 }
 
-static int max1586_pmic_remove(struct i2c_client *client)
+static int __devexit max1586_pmic_remove(struct i2c_client *client)
 {
 	struct regulator_dev **rdev = i2c_get_clientdata(client);
 	int i;
@@ -257,9 +257,10 @@
 
 static struct i2c_driver max1586_pmic_driver = {
 	.probe = max1586_pmic_probe,
-	.remove = max1586_pmic_remove,
+	.remove = __devexit_p(max1586_pmic_remove),
 	.driver		= {
 		.name	= "max1586",
+		.owner	= THIS_MODULE,
 	},
 	.id_table	= max1586_id,
 };
diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c
new file mode 100644
index 0000000..3ebdf69
--- /dev/null
+++ b/drivers/regulator/max8649.c
@@ -0,0 +1,408 @@
+/*
+ * Regulators driver for Maxim max8649
+ *
+ * Copyright (C) 2009-2010 Marvell International Ltd.
+ *      Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max8649.h>
+
+#define MAX8649_DCDC_VMIN	750000		/* uV */
+#define MAX8649_DCDC_VMAX	1380000		/* uV */
+#define MAX8649_DCDC_STEP	10000		/* uV */
+#define MAX8649_VOL_MASK	0x3f
+
+/* Registers */
+#define MAX8649_MODE0		0x00
+#define MAX8649_MODE1		0x01
+#define MAX8649_MODE2		0x02
+#define MAX8649_MODE3		0x03
+#define MAX8649_CONTROL		0x04
+#define MAX8649_SYNC		0x05
+#define MAX8649_RAMP		0x06
+#define MAX8649_CHIP_ID1	0x08
+#define MAX8649_CHIP_ID2	0x09
+
+/* Bits */
+#define MAX8649_EN_PD		(1 << 7)
+#define MAX8649_VID0_PD		(1 << 6)
+#define MAX8649_VID1_PD		(1 << 5)
+#define MAX8649_VID_MASK	(3 << 5)
+
+#define MAX8649_FORCE_PWM	(1 << 7)
+#define MAX8649_SYNC_EXTCLK	(1 << 6)
+
+#define MAX8649_EXT_MASK	(3 << 6)
+
+#define MAX8649_RAMP_MASK	(7 << 5)
+#define MAX8649_RAMP_DOWN	(1 << 1)
+
+struct max8649_regulator_info {
+	struct regulator_dev	*regulator;
+	struct i2c_client	*i2c;
+	struct device		*dev;
+	struct mutex		io_lock;
+
+	int		vol_reg;
+	unsigned	mode:2;	/* bit[1:0] = VID1, VID0 */
+	unsigned	extclk_freq:2;
+	unsigned	extclk:1;
+	unsigned	ramp_timing:3;
+	unsigned	ramp_down:1;
+};
+
+/* I2C operations */
+
+static inline int max8649_read_device(struct i2c_client *i2c,
+				      int reg, int bytes, void *dest)
+{
+	unsigned char data;
+	int ret;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static inline int max8649_write_device(struct i2c_client *i2c,
+				       int reg, int bytes, void *src)
+{
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static int max8649_reg_read(struct i2c_client *i2c, int reg)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(i2c);
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&info->io_lock);
+	ret = max8649_read_device(i2c, reg, 1, &data);
+	mutex_unlock(&info->io_lock);
+
+	if (ret < 0)
+		return ret;
+	return (int)data;
+}
+
+static int max8649_set_bits(struct i2c_client *i2c, int reg,
+			    unsigned char mask, unsigned char data)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(i2c);
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&info->io_lock);
+	ret = max8649_read_device(i2c, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = max8649_write_device(i2c, reg, 1, &value);
+out:
+	mutex_unlock(&info->io_lock);
+	return ret;
+}
+
+static inline int check_range(int min_uV, int max_uV)
+{
+	if ((min_uV < MAX8649_DCDC_VMIN) || (max_uV > MAX8649_DCDC_VMAX)
+		|| (min_uV > max_uV))
+		return -EINVAL;
+	return 0;
+}
+
+static int max8649_list_voltage(struct regulator_dev *rdev, unsigned index)
+{
+	return (MAX8649_DCDC_VMIN + index * MAX8649_DCDC_STEP);
+}
+
+static int max8649_get_voltage(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	unsigned char data;
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret < 0)
+		return ret;
+	data = (unsigned char)ret & MAX8649_VOL_MASK;
+	return max8649_list_voltage(rdev, data);
+}
+
+static int max8649_set_voltage(struct regulator_dev *rdev,
+			       int min_uV, int max_uV)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	unsigned char data, mask;
+
+	if (check_range(min_uV, max_uV)) {
+		dev_err(info->dev, "invalid voltage range (%d, %d) uV\n",
+			min_uV, max_uV);
+		return -EINVAL;
+	}
+	data = (min_uV - MAX8649_DCDC_VMIN + MAX8649_DCDC_STEP - 1)
+		/ MAX8649_DCDC_STEP;
+	mask = MAX8649_VOL_MASK;
+
+	return max8649_set_bits(info->i2c, info->vol_reg, mask, data);
+}
+
+/* EN_PD means pulldown on EN input */
+static int max8649_enable(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	return max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_EN_PD, 0);
+}
+
+/*
+ * Applied internal pulldown resistor on EN input pin.
+ * If pulldown EN pin outside, it would be better.
+ */
+static int max8649_disable(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	return max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_EN_PD,
+				MAX8649_EN_PD);
+}
+
+static int max8649_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, MAX8649_CONTROL);
+	if (ret < 0)
+		return ret;
+	return !((unsigned char)ret & MAX8649_EN_PD);
+}
+
+static int max8649_enable_time(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int voltage, rate, ret;
+
+	/* get voltage */
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret < 0)
+		return ret;
+	ret &= MAX8649_VOL_MASK;
+	voltage = max8649_list_voltage(rdev, (unsigned char)ret); /* uV */
+
+	/* get rate */
+	ret = max8649_reg_read(info->i2c, MAX8649_RAMP);
+	if (ret < 0)
+		return ret;
+	ret = (ret & MAX8649_RAMP_MASK) >> 5;
+	rate = (32 * 1000) >> ret;	/* uV/uS */
+
+	return (voltage / rate);
+}
+
+static int max8649_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		max8649_set_bits(info->i2c, info->vol_reg, MAX8649_FORCE_PWM,
+				 MAX8649_FORCE_PWM);
+		break;
+	case REGULATOR_MODE_NORMAL:
+		max8649_set_bits(info->i2c, info->vol_reg,
+				 MAX8649_FORCE_PWM, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static unsigned int max8649_get_mode(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret & MAX8649_FORCE_PWM)
+		return REGULATOR_MODE_FAST;
+	return REGULATOR_MODE_NORMAL;
+}
+
+static struct regulator_ops max8649_dcdc_ops = {
+	.set_voltage	= max8649_set_voltage,
+	.get_voltage	= max8649_get_voltage,
+	.list_voltage	= max8649_list_voltage,
+	.enable		= max8649_enable,
+	.disable	= max8649_disable,
+	.is_enabled	= max8649_is_enabled,
+	.enable_time	= max8649_enable_time,
+	.set_mode	= max8649_set_mode,
+	.get_mode	= max8649_get_mode,
+
+};
+
+static struct regulator_desc dcdc_desc = {
+	.name		= "max8649",
+	.ops		= &max8649_dcdc_ops,
+	.type		= REGULATOR_VOLTAGE,
+	.n_voltages	= 1 << 6,
+	.owner		= THIS_MODULE,
+};
+
+static int __devinit max8649_regulator_probe(struct i2c_client *client,
+					     const struct i2c_device_id *id)
+{
+	struct max8649_platform_data *pdata = client->dev.platform_data;
+	struct max8649_regulator_info *info = NULL;
+	unsigned char data;
+	int ret;
+
+	info = kzalloc(sizeof(struct max8649_regulator_info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&client->dev, "No enough memory\n");
+		return -ENOMEM;
+	}
+
+	info->i2c = client;
+	info->dev = &client->dev;
+	mutex_init(&info->io_lock);
+	i2c_set_clientdata(client, info);
+
+	info->mode = pdata->mode;
+	switch (info->mode) {
+	case 0:
+		info->vol_reg = MAX8649_MODE0;
+		break;
+	case 1:
+		info->vol_reg = MAX8649_MODE1;
+		break;
+	case 2:
+		info->vol_reg = MAX8649_MODE2;
+		break;
+	case 3:
+		info->vol_reg = MAX8649_MODE3;
+		break;
+	default:
+		break;
+	}
+
+	ret = max8649_reg_read(info->i2c, MAX8649_CHIP_ID1);
+	if (ret < 0) {
+		dev_err(info->dev, "Failed to detect ID of MAX8649:%d\n",
+			ret);
+		goto out;
+	}
+	dev_info(info->dev, "Detected MAX8649 (ID:%x)\n", ret);
+
+	/* enable VID0 & VID1 */
+	max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_VID_MASK, 0);
+
+	/* enable/disable external clock synchronization */
+	info->extclk = pdata->extclk;
+	data = (info->extclk) ? MAX8649_SYNC_EXTCLK : 0;
+	max8649_set_bits(info->i2c, info->vol_reg, MAX8649_SYNC_EXTCLK, data);
+	if (info->extclk) {
+		/* set external clock frequency */
+		info->extclk_freq = pdata->extclk_freq;
+		max8649_set_bits(info->i2c, MAX8649_SYNC, MAX8649_EXT_MASK,
+				 info->extclk_freq);
+	}
+
+	if (pdata->ramp_timing) {
+		info->ramp_timing = pdata->ramp_timing;
+		max8649_set_bits(info->i2c, MAX8649_RAMP, MAX8649_RAMP_MASK,
+				 info->ramp_timing << 5);
+	}
+
+	info->ramp_down = pdata->ramp_down;
+	if (info->ramp_down) {
+		max8649_set_bits(info->i2c, MAX8649_RAMP, MAX8649_RAMP_DOWN,
+				 MAX8649_RAMP_DOWN);
+	}
+
+	info->regulator = regulator_register(&dcdc_desc, &client->dev,
+					     pdata->regulator, info);
+	if (IS_ERR(info->regulator)) {
+		dev_err(info->dev, "failed to register regulator %s\n",
+			dcdc_desc.name);
+		ret = PTR_ERR(info->regulator);
+		goto out;
+	}
+
+	dev_info(info->dev, "Max8649 regulator device is detected.\n");
+	return 0;
+out:
+	kfree(info);
+	return ret;
+}
+
+static int __devexit max8649_regulator_remove(struct i2c_client *client)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(client);
+
+	if (info) {
+		if (info->regulator)
+			regulator_unregister(info->regulator);
+		kfree(info);
+	}
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id max8649_id[] = {
+	{ "max8649", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max8649_id);
+
+static struct i2c_driver max8649_driver = {
+	.probe		= max8649_regulator_probe,
+	.remove		= __devexit_p(max8649_regulator_remove),
+	.driver		= {
+		.name	= "max8649",
+	},
+	.id_table	= max8649_id,
+};
+
+static int __init max8649_init(void)
+{
+	return i2c_add_driver(&max8649_driver);
+}
+subsys_initcall(max8649_init);
+
+static void __exit max8649_exit(void)
+{
+	i2c_del_driver(&max8649_driver);
+}
+module_exit(max8649_exit);
+
+/* Module information */
+MODULE_DESCRIPTION("MAXIM 8649 voltage regulator driver");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
+
diff --git a/drivers/regulator/max8660.c b/drivers/regulator/max8660.c
index acc2fb7..f12f1bb 100644
--- a/drivers/regulator/max8660.c
+++ b/drivers/regulator/max8660.c
@@ -345,8 +345,8 @@
 	},
 };
 
-static int max8660_probe(struct i2c_client *client,
-			      const struct i2c_device_id *i2c_id)
+static int __devinit max8660_probe(struct i2c_client *client,
+				   const struct i2c_device_id *i2c_id)
 {
 	struct regulator_dev **rdev;
 	struct max8660_platform_data *pdata = client->dev.platform_data;
@@ -354,7 +354,7 @@
 	int boot_on, i, id, ret = -EINVAL;
 
 	if (pdata->num_subdevs > MAX8660_V_END) {
-		dev_err(&client->dev, "Too much regulators found!\n");
+		dev_err(&client->dev, "Too many regulators found!\n");
 		goto out;
 	}
 
@@ -462,7 +462,7 @@
 	return ret;
 }
 
-static int max8660_remove(struct i2c_client *client)
+static int __devexit max8660_remove(struct i2c_client *client)
 {
 	struct regulator_dev **rdev = i2c_get_clientdata(client);
 	int i;
@@ -485,9 +485,10 @@
 
 static struct i2c_driver max8660_driver = {
 	.probe = max8660_probe,
-	.remove = max8660_remove,
+	.remove = __devexit_p(max8660_remove),
 	.driver		= {
 		.name	= "max8660",
+		.owner	= THIS_MODULE,
 	},
 	.id_table	= max8660_id,
 };
diff --git a/drivers/regulator/mc13783-regulator.c b/drivers/regulator/mc13783-regulator.c
index 39c4953..f7b8184 100644
--- a/drivers/regulator/mc13783-regulator.c
+++ b/drivers/regulator/mc13783-regulator.c
@@ -2,6 +2,7 @@
  * Regulator Driver for Freescale MC13783 PMIC
  *
  * Copyright (C) 2008 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ * Copyright 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -16,11 +17,44 @@
 #include <linux/init.h>
 #include <linux/err.h>
 
-#define MC13783_REG_SWITCHERS4			28
-#define MC13783_REG_SWITCHERS4_PLLEN			(1 << 18)
-
 #define MC13783_REG_SWITCHERS5			29
 #define MC13783_REG_SWITCHERS5_SW3EN			(1 << 20)
+#define MC13783_REG_SWITCHERS5_SW3VSEL			18
+#define MC13783_REG_SWITCHERS5_SW3VSEL_M		(3 << 18)
+
+#define MC13783_REG_REGULATORSETTING0		30
+#define MC13783_REG_REGULATORSETTING0_VIOLOVSEL		2
+#define MC13783_REG_REGULATORSETTING0_VDIGVSEL		4
+#define MC13783_REG_REGULATORSETTING0_VGENVSEL		6
+#define MC13783_REG_REGULATORSETTING0_VRFDIGVSEL	9
+#define MC13783_REG_REGULATORSETTING0_VRFREFVSEL	11
+#define MC13783_REG_REGULATORSETTING0_VRFCPVSEL		13
+#define MC13783_REG_REGULATORSETTING0_VSIMVSEL		14
+#define MC13783_REG_REGULATORSETTING0_VESIMVSEL		15
+#define MC13783_REG_REGULATORSETTING0_VCAMVSEL		16
+
+#define MC13783_REG_REGULATORSETTING0_VIOLOVSEL_M	(3 << 2)
+#define MC13783_REG_REGULATORSETTING0_VDIGVSEL_M	(3 << 4)
+#define MC13783_REG_REGULATORSETTING0_VGENVSEL_M	(7 << 6)
+#define MC13783_REG_REGULATORSETTING0_VRFDIGVSEL_M	(3 << 9)
+#define MC13783_REG_REGULATORSETTING0_VRFREFVSEL_M	(3 << 11)
+#define MC13783_REG_REGULATORSETTING0_VRFCPVSEL_M	(1 << 13)
+#define MC13783_REG_REGULATORSETTING0_VSIMVSEL_M	(1 << 14)
+#define MC13783_REG_REGULATORSETTING0_VESIMVSEL_M	(1 << 15)
+#define MC13783_REG_REGULATORSETTING0_VCAMVSEL_M	(7 << 16)
+
+#define MC13783_REG_REGULATORSETTING1		31
+#define MC13783_REG_REGULATORSETTING1_VVIBVSEL		0
+#define MC13783_REG_REGULATORSETTING1_VRF1VSEL		2
+#define MC13783_REG_REGULATORSETTING1_VRF2VSEL		4
+#define MC13783_REG_REGULATORSETTING1_VMMC1VSEL		6
+#define MC13783_REG_REGULATORSETTING1_VMMC2VSEL		9
+
+#define MC13783_REG_REGULATORSETTING1_VVIBVSEL_M	(3 << 0)
+#define MC13783_REG_REGULATORSETTING1_VRF1VSEL_M	(3 << 2)
+#define MC13783_REG_REGULATORSETTING1_VRF2VSEL_M	(3 << 4)
+#define MC13783_REG_REGULATORSETTING1_VMMC1VSEL_M	(7 << 6)
+#define MC13783_REG_REGULATORSETTING1_VMMC2VSEL_M	(7 << 9)
 
 #define MC13783_REG_REGULATORMODE0		32
 #define MC13783_REG_REGULATORMODE0_VAUDIOEN		(1 << 0)
@@ -48,19 +82,107 @@
 #define MC13783_REG_POWERMISC_GPO2EN			(1 << 8)
 #define MC13783_REG_POWERMISC_GPO3EN			(1 << 10)
 #define MC13783_REG_POWERMISC_GPO4EN			(1 << 12)
+#define MC13783_REG_POWERMISC_PWGT1SPIEN		(1 << 15)
+#define MC13783_REG_POWERMISC_PWGT2SPIEN		(1 << 16)
+
+#define MC13783_REG_POWERMISC_PWGTSPI_M			(3 << 15)
+
 
 struct mc13783_regulator {
 	struct regulator_desc desc;
 	int reg;
 	int enable_bit;
+	int vsel_reg;
+	int vsel_shift;
+	int vsel_mask;
+	int const *voltages;
+};
+
+/* Voltage Values */
+static const int const mc13783_sw3_val[] = {
+	5000000, 5000000, 5000000, 5500000,
+};
+
+static const int const mc13783_vaudio_val[] = {
+	2775000,
+};
+
+static const int const mc13783_viohi_val[] = {
+	2775000,
+};
+
+static const int const mc13783_violo_val[] = {
+	1200000, 1300000, 1500000, 1800000,
+};
+
+static const int const mc13783_vdig_val[] = {
+	1200000, 1300000, 1500000, 1800000,
+};
+
+static const int const mc13783_vgen_val[] = {
+	1200000, 1300000, 1500000, 1800000,
+	1100000, 2000000, 2775000, 2400000,
+};
+
+static const int const mc13783_vrfdig_val[] = {
+	1200000, 1500000, 1800000, 1875000,
+};
+
+static const int const mc13783_vrfref_val[] = {
+	2475000, 2600000, 2700000, 2775000,
+};
+
+static const int const mc13783_vrfcp_val[] = {
+	2700000, 2775000,
+};
+
+static const int const mc13783_vsim_val[] = {
+	1800000, 2900000, 3000000,
+};
+
+static const int const mc13783_vesim_val[] = {
+	1800000, 2900000,
+};
+
+static const int const mc13783_vcam_val[] = {
+	1500000, 1800000, 2500000, 2550000,
+	2600000, 2750000, 2800000, 3000000,
+};
+
+static const int const mc13783_vrfbg_val[] = {
+	1250000,
+};
+
+static const int const mc13783_vvib_val[] = {
+	1300000, 1800000, 2000000, 3000000,
+};
+
+static const int const mc13783_vmmc_val[] = {
+	1600000, 1800000, 2000000, 2600000,
+	2700000, 2800000, 2900000, 3000000,
+};
+
+static const int const mc13783_vrf_val[] = {
+	1500000, 1875000, 2700000, 2775000,
+};
+
+static const int const mc13783_gpo_val[] = {
+	3100000,
+};
+
+static const int const mc13783_pwgtdrv_val[] = {
+	5500000,
 };
 
 static struct regulator_ops mc13783_regulator_ops;
+static struct regulator_ops mc13783_fixed_regulator_ops;
+static struct regulator_ops mc13783_gpo_regulator_ops;
 
-#define MC13783_DEFINE(prefix, _name, _reg)				\
+#define MC13783_DEFINE(prefix, _name, _reg, _vsel_reg, _voltages)	\
 	[MC13783_ ## prefix ## _ ## _name] = {				\
 		.desc = {						\
 			.name = #prefix "_" #_name,			\
+			.n_voltages = ARRAY_SIZE(_voltages),		\
 			.ops = &mc13783_regulator_ops,			\
 			.type = REGULATOR_VOLTAGE,			\
 			.id = MC13783_ ## prefix ## _ ## _name,		\
@@ -68,40 +190,92 @@
 		},							\
 		.reg = MC13783_REG_ ## _reg,				\
 		.enable_bit = MC13783_REG_ ## _reg ## _ ## _name ## EN,	\
+		.vsel_reg = MC13783_REG_ ## _vsel_reg,			\
+		.vsel_shift = MC13783_REG_ ## _vsel_reg ## _ ## _name ## VSEL,\
+		.vsel_mask = MC13783_REG_ ## _vsel_reg ## _ ## _name ## VSEL_M,\
+		.voltages =  _voltages,					\
 	}
 
-#define MC13783_DEFINE_SW(_name, _reg) MC13783_DEFINE(SW, _name, _reg)
-#define MC13783_DEFINE_REGU(_name, _reg) MC13783_DEFINE(REGU, _name, _reg)
+#define MC13783_FIXED_DEFINE(prefix, _name, _reg, _voltages)		\
+	[MC13783_ ## prefix ## _ ## _name] = {				\
+		.desc = {						\
+			.name = #prefix "_" #_name,			\
+			.n_voltages = ARRAY_SIZE(_voltages),		\
+			.ops = &mc13783_fixed_regulator_ops,		\
+			.type = REGULATOR_VOLTAGE,			\
+			.id = MC13783_ ## prefix ## _ ## _name,		\
+			.owner = THIS_MODULE,				\
+		},							\
+		.reg = MC13783_REG_ ## _reg,				\
+		.enable_bit = MC13783_REG_ ## _reg ## _ ## _name ## EN,	\
+		.voltages =  _voltages,					\
+	}
+
+#define MC13783_GPO_DEFINE(prefix, _name, _reg,  _voltages)		\
+	[MC13783_ ## prefix ## _ ## _name] = {				\
+		.desc = {						\
+			.name = #prefix "_" #_name,			\
+			.n_voltages = ARRAY_SIZE(_voltages),		\
+			.ops = &mc13783_gpo_regulator_ops,		\
+			.type = REGULATOR_VOLTAGE,			\
+			.id = MC13783_ ## prefix ## _ ## _name,		\
+			.owner = THIS_MODULE,				\
+		},							\
+		.reg = MC13783_REG_ ## _reg,				\
+		.enable_bit = MC13783_REG_ ## _reg ## _ ## _name ## EN,	\
+		.voltages =  _voltages,					\
+	}
+
+#define MC13783_DEFINE_SW(_name, _reg, _vsel_reg, _voltages)		\
+	MC13783_DEFINE(SW, _name, _reg, _vsel_reg, _voltages)
+#define MC13783_DEFINE_REGU(_name, _reg, _vsel_reg, _voltages)		\
+	MC13783_DEFINE(REGU, _name, _reg, _vsel_reg, _voltages)
 
 static struct mc13783_regulator mc13783_regulators[] = {
-	MC13783_DEFINE_SW(SW3, SWITCHERS5),
-	MC13783_DEFINE_SW(PLL, SWITCHERS4),
+	MC13783_DEFINE_SW(SW3, SWITCHERS5, SWITCHERS5, mc13783_sw3_val),
 
-	MC13783_DEFINE_REGU(VAUDIO, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VIOHI, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VIOLO, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VDIG, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VGEN, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VRFDIG, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VRFREF, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VRFCP, REGULATORMODE0),
-	MC13783_DEFINE_REGU(VSIM, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VESIM, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VCAM, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VRFBG, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VVIB, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VRF1, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VRF2, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VMMC1, REGULATORMODE1),
-	MC13783_DEFINE_REGU(VMMC2, REGULATORMODE1),
-	MC13783_DEFINE_REGU(GPO1, POWERMISC),
-	MC13783_DEFINE_REGU(GPO2, POWERMISC),
-	MC13783_DEFINE_REGU(GPO3, POWERMISC),
-	MC13783_DEFINE_REGU(GPO4, POWERMISC),
+	MC13783_FIXED_DEFINE(REGU, VAUDIO, REGULATORMODE0, mc13783_vaudio_val),
+	MC13783_FIXED_DEFINE(REGU, VIOHI, REGULATORMODE0, mc13783_viohi_val),
+	MC13783_DEFINE_REGU(VIOLO, REGULATORMODE0, REGULATORSETTING0,	\
+			    mc13783_violo_val),
+	MC13783_DEFINE_REGU(VDIG, REGULATORMODE0, REGULATORSETTING0,	\
+			    mc13783_vdig_val),
+	MC13783_DEFINE_REGU(VGEN, REGULATORMODE0, REGULATORSETTING0,	\
+			    mc13783_vgen_val),
+	MC13783_DEFINE_REGU(VRFDIG, REGULATORMODE0, REGULATORSETTING0,	\
+			    mc13783_vrfdig_val),
+	MC13783_DEFINE_REGU(VRFREF, REGULATORMODE0, REGULATORSETTING0,	\
+			    mc13783_vrfref_val),
+	MC13783_DEFINE_REGU(VRFCP, REGULATORMODE0, REGULATORSETTING0,	\
+			    mc13783_vrfcp_val),
+	MC13783_DEFINE_REGU(VSIM, REGULATORMODE1, REGULATORSETTING0,	\
+			    mc13783_vsim_val),
+	MC13783_DEFINE_REGU(VESIM, REGULATORMODE1, REGULATORSETTING0,	\
+			    mc13783_vesim_val),
+	MC13783_DEFINE_REGU(VCAM, REGULATORMODE1, REGULATORSETTING0,	\
+			    mc13783_vcam_val),
+	MC13783_FIXED_DEFINE(REGU, VRFBG, REGULATORMODE1, mc13783_vrfbg_val),
+	MC13783_DEFINE_REGU(VVIB, REGULATORMODE1, REGULATORSETTING1,	\
+			    mc13783_vvib_val),
+	MC13783_DEFINE_REGU(VRF1, REGULATORMODE1, REGULATORSETTING1,	\
+			    mc13783_vrf_val),
+	MC13783_DEFINE_REGU(VRF2, REGULATORMODE1, REGULATORSETTING1,	\
+			    mc13783_vrf_val),
+	MC13783_DEFINE_REGU(VMMC1, REGULATORMODE1, REGULATORSETTING1,	\
+			    mc13783_vmmc_val),
+	MC13783_DEFINE_REGU(VMMC2, REGULATORMODE1, REGULATORSETTING1,	\
+			    mc13783_vmmc_val),
+	MC13783_GPO_DEFINE(REGU, GPO1, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO2, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO3, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO4, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, PWGT1SPI, POWERMISC, mc13783_pwgtdrv_val),
+	MC13783_GPO_DEFINE(REGU, PWGT2SPI, POWERMISC, mc13783_pwgtdrv_val),
 };
 
 struct mc13783_regulator_priv {
 	struct mc13783 *mc13783;
+	u32 powermisc_pwgt_state;
 	struct regulator_dev *regulators[];
 };
 
@@ -154,10 +328,241 @@
 	return (val & mc13783_regulators[id].enable_bit) != 0;
 }
 
+static int mc13783_regulator_list_voltage(struct regulator_dev *rdev,
+						unsigned selector)
+{
+	int id = rdev_get_id(rdev);
+
+	if (selector >= mc13783_regulators[id].desc.n_voltages)
+		return -EINVAL;
+
+	return mc13783_regulators[id].voltages[selector];
+}
+
+static int mc13783_get_best_voltage_index(struct regulator_dev *rdev,
+						int min_uV, int max_uV)
+{
+	int reg_id = rdev_get_id(rdev);
+	int i;
+	int bestmatch;
+	int bestindex;
+
+	/*
+	 * Locate the minimum voltage fitting the criteria on
+	 * this regulator. The switchable voltages are not
+	 * in strict falling order so we need to check them
+	 * all for the best match.
+	 */
+	bestmatch = INT_MAX;
+	bestindex = -1;
+	for (i = 0; i < mc13783_regulators[reg_id].desc.n_voltages; i++) {
+		if (mc13783_regulators[reg_id].voltages[i] >= min_uV &&
+		    mc13783_regulators[reg_id].voltages[i] < bestmatch) {
+			bestmatch = mc13783_regulators[reg_id].voltages[i];
+			bestindex = i;
+		}
+	}
+
+	if (bestindex < 0 || bestmatch > max_uV) {
+		dev_warn(&rdev->dev, "no possible value for %d<=x<=%d uV\n",
+				min_uV, max_uV);
+		return -EINVAL;
+	}
+	return bestindex;
+}
+
+static int mc13783_regulator_set_voltage(struct regulator_dev *rdev,
+						int min_uV, int max_uV)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int value, id = rdev_get_id(rdev);
+	int ret;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d min_uV: %d max_uV: %d\n",
+		__func__, id, min_uV, max_uV);
+
+	/* Find the best index */
+	value = mc13783_get_best_voltage_index(rdev, min_uV, max_uV);
+	dev_dbg(rdev_get_dev(rdev), "%s best value: %d \n", __func__, value);
+	if (value < 0)
+		return value;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_reg_rmw(priv->mc13783, mc13783_regulators[id].vsel_reg,
+			mc13783_regulators[id].vsel_mask,
+			value << mc13783_regulators[id].vsel_shift);
+	mc13783_unlock(priv->mc13783);
+
+	return ret;
+}
+
+static int mc13783_regulator_get_voltage(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int ret, id = rdev_get_id(rdev);
+	unsigned int val;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_reg_read(priv->mc13783,
+				mc13783_regulators[id].vsel_reg, &val);
+	mc13783_unlock(priv->mc13783);
+
+	if (ret)
+		return ret;
+
+	val = (val & mc13783_regulators[id].vsel_mask)
+		>> mc13783_regulators[id].vsel_shift;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d val: %d\n", __func__, id, val);
+
+	BUG_ON(val < 0 || val > mc13783_regulators[id].desc.n_voltages);
+
+	return mc13783_regulators[id].voltages[val];
+}
+
 static struct regulator_ops mc13783_regulator_ops = {
 	.enable = mc13783_regulator_enable,
 	.disable = mc13783_regulator_disable,
 	.is_enabled = mc13783_regulator_is_enabled,
+	.list_voltage = mc13783_regulator_list_voltage,
+	.set_voltage = mc13783_regulator_set_voltage,
+	.get_voltage = mc13783_regulator_get_voltage,
+};
+
+static int mc13783_fixed_regulator_set_voltage(struct regulator_dev *rdev,
+						int min_uV, int max_uV)
+{
+	int id = rdev_get_id(rdev);
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d min_uV: %d max_uV: %d\n",
+		__func__, id, min_uV, max_uV);
+
+	if (min_uV > mc13783_regulators[id].voltages[0] &&
+	    max_uV < mc13783_regulators[id].voltages[0])
+		return 0;
+	else
+		return -EINVAL;
+}
+
+static int mc13783_fixed_regulator_get_voltage(struct regulator_dev *rdev)
+{
+	int id = rdev_get_id(rdev);
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	return mc13783_regulators[id].voltages[0];
+}
+
+static struct regulator_ops mc13783_fixed_regulator_ops = {
+	.enable = mc13783_regulator_enable,
+	.disable = mc13783_regulator_disable,
+	.is_enabled = mc13783_regulator_is_enabled,
+	.list_voltage = mc13783_regulator_list_voltage,
+	.set_voltage = mc13783_fixed_regulator_set_voltage,
+	.get_voltage = mc13783_fixed_regulator_get_voltage,
+};
+
+int mc13783_powermisc_rmw(struct mc13783_regulator_priv *priv, u32 mask,
+									u32 val)
+{
+	struct mc13783 *mc13783 = priv->mc13783;
+	int ret;
+	u32 valread;
+
+	BUG_ON(val & ~mask);
+
+	ret = mc13783_reg_read(mc13783, MC13783_REG_POWERMISC, &valread);
+	if (ret)
+		return ret;
+
+	/* Update the stored state for Power Gates. */
+	priv->powermisc_pwgt_state =
+				(priv->powermisc_pwgt_state & ~mask) | val;
+	priv->powermisc_pwgt_state &= MC13783_REG_POWERMISC_PWGTSPI_M;
+
+	/* Construct the new register value */
+	valread = (valread & ~mask) | val;
+	/* Overwrite the PWGTxEN with the stored version */
+	valread = (valread & ~MC13783_REG_POWERMISC_PWGTSPI_M) |
+						priv->powermisc_pwgt_state;
+
+	return mc13783_reg_write(mc13783, MC13783_REG_POWERMISC, valread);
+}
+
+static int mc13783_gpo_regulator_enable(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	int ret;
+	u32 en_val = mc13783_regulators[id].enable_bit;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	/* Power Gate enable value is 0 */
+	if (id == MC13783_REGU_PWGT1SPI ||
+	    id == MC13783_REGU_PWGT2SPI)
+		en_val = 0;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_powermisc_rmw(priv, mc13783_regulators[id].enable_bit,
+					en_val);
+	mc13783_unlock(priv->mc13783);
+
+	return ret;
+}
+
+static int mc13783_gpo_regulator_disable(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	int ret;
+	u32 dis_val = 0;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	/* Power Gate disable value is 1 */
+	if (id == MC13783_REGU_PWGT1SPI ||
+	    id == MC13783_REGU_PWGT2SPI)
+		dis_val = mc13783_regulators[id].enable_bit;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_powermisc_rmw(priv, mc13783_regulators[id].enable_bit,
+					dis_val);
+	mc13783_unlock(priv->mc13783);
+
+	return ret;
+}
+
+static int mc13783_gpo_regulator_is_enabled(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int ret, id = rdev_get_id(rdev);
+	unsigned int val;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_reg_read(priv->mc13783, mc13783_regulators[id].reg, &val);
+	mc13783_unlock(priv->mc13783);
+
+	if (ret)
+		return ret;
+
+	/* Power Gates state is stored in powermisc_pwgt_state
+	 * where the meaning of bits is negated */
+	val = (val & ~MC13783_REG_POWERMISC_PWGTSPI_M) |
+	      (priv->powermisc_pwgt_state ^ MC13783_REG_POWERMISC_PWGTSPI_M);
+
+	return (val & mc13783_regulators[id].enable_bit) != 0;
+}
+
+static struct regulator_ops mc13783_gpo_regulator_ops = {
+	.enable = mc13783_gpo_regulator_enable,
+	.disable = mc13783_gpo_regulator_disable,
+	.is_enabled = mc13783_gpo_regulator_is_enabled,
+	.list_voltage = mc13783_regulator_list_voltage,
+	.set_voltage = mc13783_fixed_regulator_set_voltage,
+	.get_voltage = mc13783_fixed_regulator_get_voltage,
 };
 
 static int __devinit mc13783_regulator_probe(struct platform_device *pdev)
diff --git a/drivers/regulator/pcap-regulator.c b/drivers/regulator/pcap-regulator.c
index 33d7d89..29d0566 100644
--- a/drivers/regulator/pcap-regulator.c
+++ b/drivers/regulator/pcap-regulator.c
@@ -288,16 +288,18 @@
 	struct regulator_dev *rdev = platform_get_drvdata(pdev);
 
 	regulator_unregister(rdev);
+	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
 
 static struct platform_driver pcap_regulator_driver = {
 	.driver = {
-		.name = "pcap-regulator",
+		.name	= "pcap-regulator",
+		.owner	= THIS_MODULE,
 	},
-	.probe = pcap_regulator_probe,
-	.remove = __devexit_p(pcap_regulator_remove),
+	.probe	= pcap_regulator_probe,
+	.remove	= __devexit_p(pcap_regulator_remove),
 };
 
 static int __init pcap_regulator_init(void)
diff --git a/drivers/regulator/tps65023-regulator.c b/drivers/regulator/tps65023-regulator.c
index 07fda0a..1f18354 100644
--- a/drivers/regulator/tps65023-regulator.c
+++ b/drivers/regulator/tps65023-regulator.c
@@ -457,8 +457,8 @@
 	.list_voltage = tps65023_ldo_list_voltage,
 };
 
-static
-int tps_65023_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int __devinit tps_65023_probe(struct i2c_client *client,
+				     const struct i2c_device_id *id)
 {
 	static int desc_id;
 	const struct tps_info *info = (void *)id->driver_data;
@@ -466,6 +466,7 @@
 	struct regulator_dev *rdev;
 	struct tps_pmic *tps;
 	int i;
+	int error;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
@@ -475,7 +476,6 @@
 	 * coming from the board-evm file.
 	 */
 	init_data = client->dev.platform_data;
-
 	if (!init_data)
 		return -EIO;
 
@@ -502,21 +502,12 @@
 
 		/* Register the regulators */
 		rdev = regulator_register(&tps->desc[i], &client->dev,
-								init_data, tps);
+					  init_data, tps);
 		if (IS_ERR(rdev)) {
 			dev_err(&client->dev, "failed to register %s\n",
 				id->name);
-
-			/* Unregister */
-			while (i)
-				regulator_unregister(tps->rdev[--i]);
-
-			tps->client = NULL;
-
-			/* clear the client data in i2c */
-			i2c_set_clientdata(client, NULL);
-			kfree(tps);
-			return PTR_ERR(rdev);
+			error = PTR_ERR(rdev);
+			goto fail;
 		}
 
 		/* Save regulator for cleanup */
@@ -526,6 +517,13 @@
 	i2c_set_clientdata(client, tps);
 
 	return 0;
+
+ fail:
+	while (--i >= 0)
+		regulator_unregister(tps->rdev[i]);
+
+	kfree(tps);
+	return error;
 }
 
 /**
@@ -539,13 +537,12 @@
 	struct tps_pmic *tps = i2c_get_clientdata(client);
 	int i;
 
+	/* clear the client data in i2c */
+	i2c_set_clientdata(client, NULL);
+
 	for (i = 0; i < TPS65023_NUM_REGULATOR; i++)
 		regulator_unregister(tps->rdev[i]);
 
-	tps->client = NULL;
-
-	/* clear the client data in i2c */
-	i2c_set_clientdata(client, NULL);
 	kfree(tps);
 
 	return 0;
diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c
index f8a6dfb..c2a9539 100644
--- a/drivers/regulator/tps6507x-regulator.c
+++ b/drivers/regulator/tps6507x-regulator.c
@@ -538,8 +538,8 @@
 	.list_voltage = tps6507x_ldo_list_voltage,
 };
 
-static
-int tps_6507x_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int __devinit tps_6507x_probe(struct i2c_client *client,
+				     const struct i2c_device_id *id)
 {
 	static int desc_id;
 	const struct tps_info *info = (void *)id->driver_data;
@@ -547,6 +547,7 @@
 	struct regulator_dev *rdev;
 	struct tps_pmic *tps;
 	int i;
+	int error;
 
 	if (!i2c_check_functionality(client->adapter,
 				I2C_FUNC_SMBUS_BYTE_DATA))
@@ -557,7 +558,6 @@
 	 * coming from the board-evm file.
 	 */
 	init_data = client->dev.platform_data;
-
 	if (!init_data)
 		return -EIO;
 
@@ -586,18 +586,8 @@
 		if (IS_ERR(rdev)) {
 			dev_err(&client->dev, "failed to register %s\n",
 				id->name);
-
-			/* Unregister */
-			while (i)
-				regulator_unregister(tps->rdev[--i]);
-
-			tps->client = NULL;
-
-			/* clear the client data in i2c */
-			i2c_set_clientdata(client, NULL);
-
-			kfree(tps);
-			return PTR_ERR(rdev);
+			error = PTR_ERR(rdev);
+			goto fail;
 		}
 
 		/* Save regulator for cleanup */
@@ -607,6 +597,13 @@
 	i2c_set_clientdata(client, tps);
 
 	return 0;
+
+fail:
+	while (--i >= 0)
+		regulator_unregister(tps->rdev[i]);
+
+	kfree(tps);
+	return error;
 }
 
 /**
@@ -620,13 +617,12 @@
 	struct tps_pmic *tps = i2c_get_clientdata(client);
 	int i;
 
+	/* clear the client data in i2c */
+	i2c_set_clientdata(client, NULL);
+
 	for (i = 0; i < TPS6507X_NUM_REGULATOR; i++)
 		regulator_unregister(tps->rdev[i]);
 
-	tps->client = NULL;
-
-	/* clear the client data in i2c */
-	i2c_set_clientdata(client, NULL);
 	kfree(tps);
 
 	return 0;
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 7e67485..9729d76 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -519,19 +519,19 @@
 	/* 6030 REG with base as PMC Slave Misc : 0x0030 */
 	/* Turnon-delay and remap configuration values for 6030 are not
 	   verified since the specification is not public */
-	TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1, 0, 0x08),
-	TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 2, 0, 0x08),
-	TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 3, 0, 0x08),
-	TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 4, 0, 0x08),
-	TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 5, 0, 0x08),
-	TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 7, 0, 0x08),
-	TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15, 0, 0x08),
-	TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16, 0, 0x08),
-	TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17, 0, 0x08),
-	TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18, 0, 0x08)
+	TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1, 0, 0x21),
+	TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 2, 0, 0x21),
+	TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 3, 0, 0x21),
+	TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 4, 0, 0x21),
+	TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 5, 0, 0x21),
+	TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 7, 0, 0x21),
+	TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15, 0, 0x21),
+	TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16, 0, 0x21),
+	TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17, 0, 0x21),
+	TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18, 0, 0x21)
 };
 
-static int twlreg_probe(struct platform_device *pdev)
+static int __devinit twlreg_probe(struct platform_device *pdev)
 {
 	int				i;
 	struct twlreg_info		*info;
diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index addc032..d96ceca 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -19,7 +19,7 @@
 struct virtual_consumer_data {
 	struct mutex lock;
 	struct regulator *regulator;
-	int enabled;
+	bool enabled;
 	int min_uV;
 	int max_uV;
 	int min_uA;
@@ -49,7 +49,7 @@
 		dev_dbg(dev, "Enabling regulator\n");
 		ret = regulator_enable(data->regulator);
 		if (ret == 0)
-			data->enabled = 1;
+			data->enabled = true;
 		else
 			dev_err(dev, "regulator_enable() failed: %d\n",
 				ret);
@@ -59,7 +59,7 @@
 		dev_dbg(dev, "Disabling regulator\n");
 		ret = regulator_disable(data->regulator);
 		if (ret == 0)
-			data->enabled = 0;
+			data->enabled = false;
 		else
 			dev_err(dev, "regulator_disable() failed: %d\n",
 				ret);
@@ -89,7 +89,7 @@
 		dev_dbg(dev, "Enabling regulator\n");
 		ret = regulator_enable(data->regulator);
 		if (ret == 0)
-			data->enabled = 1;
+			data->enabled = true;
 		else
 			dev_err(dev, "regulator_enable() failed: %d\n",
 				ret);
@@ -99,7 +99,7 @@
 		dev_dbg(dev, "Disabling regulator\n");
 		ret = regulator_disable(data->regulator);
 		if (ret == 0)
-			data->enabled = 0;
+			data->enabled = false;
 		else
 			dev_err(dev, "regulator_disable() failed: %d\n",
 				ret);
@@ -270,24 +270,28 @@
 static DEVICE_ATTR(max_microamps, 0666, show_max_uA, set_max_uA);
 static DEVICE_ATTR(mode, 0666, show_mode, set_mode);
 
-static struct device_attribute *attributes[] = {
-	&dev_attr_min_microvolts,
-	&dev_attr_max_microvolts,
-	&dev_attr_min_microamps,
-	&dev_attr_max_microamps,
-	&dev_attr_mode,
+static struct attribute *regulator_virtual_attributes[] = {
+	&dev_attr_min_microvolts.attr,
+	&dev_attr_max_microvolts.attr,
+	&dev_attr_min_microamps.attr,
+	&dev_attr_max_microamps.attr,
+	&dev_attr_mode.attr,
+	NULL
 };
 
-static int regulator_virtual_consumer_probe(struct platform_device *pdev)
+static const struct attribute_group regulator_virtual_attr_group = {
+	.attrs	= regulator_virtual_attributes,
+};
+
+static int __devinit regulator_virtual_probe(struct platform_device *pdev)
 {
 	char *reg_id = pdev->dev.platform_data;
 	struct virtual_consumer_data *drvdata;
-	int ret, i;
+	int ret;
 
 	drvdata = kzalloc(sizeof(struct virtual_consumer_data), GFP_KERNEL);
-	if (drvdata == NULL) {
+	if (drvdata == NULL)
 		return -ENOMEM;
-	}
 
 	mutex_init(&drvdata->lock);
 
@@ -299,13 +303,12 @@
 		goto err;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(attributes); i++) {
-		ret = device_create_file(&pdev->dev, attributes[i]);
-		if (ret != 0) {
-			dev_err(&pdev->dev, "Failed to create attr %d: %d\n",
-				i, ret);
-			goto err_regulator;
-		}
+	ret = sysfs_create_group(&pdev->dev.kobj,
+				 &regulator_virtual_attr_group);
+	if (ret != 0) {
+		dev_err(&pdev->dev,
+			"Failed to create attribute group: %d\n", ret);
+		goto err_regulator;
 	}
 
 	drvdata->mode = regulator_get_mode(drvdata->regulator);
@@ -317,37 +320,36 @@
 err_regulator:
 	regulator_put(drvdata->regulator);
 err:
-	for (i = 0; i < ARRAY_SIZE(attributes); i++)
-		device_remove_file(&pdev->dev, attributes[i]);
 	kfree(drvdata);
 	return ret;
 }
 
-static int regulator_virtual_consumer_remove(struct platform_device *pdev)
+static int __devexit regulator_virtual_remove(struct platform_device *pdev)
 {
 	struct virtual_consumer_data *drvdata = platform_get_drvdata(pdev);
-	int i;
 
-	for (i = 0; i < ARRAY_SIZE(attributes); i++)
-		device_remove_file(&pdev->dev, attributes[i]);
+	sysfs_remove_group(&pdev->dev.kobj, &regulator_virtual_attr_group);
+
 	if (drvdata->enabled)
 		regulator_disable(drvdata->regulator);
 	regulator_put(drvdata->regulator);
 
 	kfree(drvdata);
 
+	platform_set_drvdata(pdev, NULL);
+
 	return 0;
 }
 
 static struct platform_driver regulator_virtual_consumer_driver = {
-	.probe		= regulator_virtual_consumer_probe,
-	.remove		= regulator_virtual_consumer_remove,
+	.probe		= regulator_virtual_probe,
+	.remove		= __devexit_p(regulator_virtual_remove),
 	.driver		= {
 		.name		= "reg-virt-consumer",
+		.owner		= THIS_MODULE,
 	},
 };
 
-
 static int __init regulator_virtual_consumer_init(void)
 {
 	return platform_driver_register(&regulator_virtual_consumer_driver);
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 0a65775..6e18e56 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -600,6 +600,8 @@
 	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
 	struct wm831x *wm831x = dcdc->wm831x;
 
+	platform_set_drvdata(pdev, NULL);
+
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "HC"), dcdc);
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
@@ -615,6 +617,7 @@
 	.remove = __devexit_p(wm831x_buckv_remove),
 	.driver		= {
 		.name	= "wm831x-buckv",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -769,6 +772,8 @@
 	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
 	struct wm831x *wm831x = dcdc->wm831x;
 
+	platform_set_drvdata(pdev, NULL);
+
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
 	kfree(dcdc);
@@ -781,6 +786,7 @@
 	.remove = __devexit_p(wm831x_buckp_remove),
 	.driver		= {
 		.name	= "wm831x-buckp",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -895,6 +901,8 @@
 	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
 	struct wm831x *wm831x = dcdc->wm831x;
 
+	platform_set_drvdata(pdev, NULL);
+
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
 	regulator_unregister(dcdc->regulator);
 	kfree(dcdc);
@@ -907,6 +915,7 @@
 	.remove = __devexit_p(wm831x_boostp_remove),
 	.driver		= {
 		.name	= "wm831x-boostp",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -979,6 +988,8 @@
 {
 	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
 
+	platform_set_drvdata(pdev, NULL);
+
 	regulator_unregister(dcdc->regulator);
 	kfree(dcdc);
 
@@ -990,6 +1001,7 @@
 	.remove = __devexit_p(wm831x_epe_remove),
 	.driver		= {
 		.name	= "wm831x-epe",
+		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/regulator/wm831x-isink.c b/drivers/regulator/wm831x-isink.c
index 4885700..ca0f6b6 100644
--- a/drivers/regulator/wm831x-isink.c
+++ b/drivers/regulator/wm831x-isink.c
@@ -222,6 +222,8 @@
 	struct wm831x_isink *isink = platform_get_drvdata(pdev);
 	struct wm831x *wm831x = isink->wm831x;
 
+	platform_set_drvdata(pdev, NULL);
+
 	wm831x_free_irq(wm831x, platform_get_irq(pdev, 0), isink);
 
 	regulator_unregister(isink->regulator);
@@ -235,6 +237,7 @@
 	.remove = __devexit_p(wm831x_isink_remove),
 	.driver		= {
 		.name	= "wm831x-isink",
+		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
index 61e02ac..d2406c1 100644
--- a/drivers/regulator/wm831x-ldo.c
+++ b/drivers/regulator/wm831x-ldo.c
@@ -371,6 +371,8 @@
 	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
 	struct wm831x *wm831x = ldo->wm831x;
 
+	platform_set_drvdata(pdev, NULL);
+
 	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), ldo);
 	regulator_unregister(ldo->regulator);
 	kfree(ldo);
@@ -383,6 +385,7 @@
 	.remove = __devexit_p(wm831x_gp_ldo_remove),
 	.driver		= {
 		.name	= "wm831x-ldo",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -640,6 +643,7 @@
 	.remove = __devexit_p(wm831x_aldo_remove),
 	.driver		= {
 		.name	= "wm831x-aldo",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -811,6 +815,7 @@
 	.remove = __devexit_p(wm831x_alive_ldo_remove),
 	.driver		= {
 		.name	= "wm831x-alive-ldo",
+		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index e7b89e7..94227dd 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -290,6 +290,51 @@
 	return -EINVAL;
 }
 
+static int wm8350_isink_enable_time(struct regulator_dev *rdev)
+{
+	struct wm8350 *wm8350 = rdev_get_drvdata(rdev);
+	int isink = rdev_get_id(rdev);
+	int reg;
+
+	switch (isink) {
+	case WM8350_ISINK_A:
+		reg = wm8350_reg_read(wm8350, WM8350_CSA_FLASH_CONTROL);
+		break;
+	case WM8350_ISINK_B:
+		reg = wm8350_reg_read(wm8350, WM8350_CSB_FLASH_CONTROL);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (reg & WM8350_CS1_FLASH_MODE) {
+		switch (reg & WM8350_CS1_ON_RAMP_MASK) {
+		case 0:
+			return 0;
+		case 1:
+			return 1950;
+		case 2:
+			return 3910;
+		case 3:
+			return 7800;
+		}
+	} else {
+		switch (reg & WM8350_CS1_ON_RAMP_MASK) {
+		case 0:
+			return 0;
+		case 1:
+			return 250000;
+		case 2:
+			return 500000;
+		case 3:
+			return 1000000;
+		}
+	}
+
+	return -EINVAL;
+}
+
+
 int wm8350_isink_set_flash(struct wm8350 *wm8350, int isink, u16 mode,
 			   u16 trigger, u16 duration, u16 on_ramp, u16 off_ramp,
 			   u16 drive)
@@ -1221,6 +1266,7 @@
 	.enable = wm8350_isink_enable,
 	.disable = wm8350_isink_disable,
 	.is_enabled = wm8350_isink_is_enabled,
+	.enable_time = wm8350_isink_enable_time,
 };
 
 static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
diff --git a/drivers/regulator/wm8400-regulator.c b/drivers/regulator/wm8400-regulator.c
index d9a2c98..924c7eb 100644
--- a/drivers/regulator/wm8400-regulator.c
+++ b/drivers/regulator/wm8400-regulator.c
@@ -317,14 +317,17 @@
 
 static int __devinit wm8400_regulator_probe(struct platform_device *pdev)
 {
+	struct wm8400 *wm8400 = container_of(pdev, struct wm8400, regulators[pdev->id]);
 	struct regulator_dev *rdev;
 
 	rdev = regulator_register(&regulators[pdev->id], &pdev->dev,
-		pdev->dev.platform_data, dev_get_drvdata(&pdev->dev));
+				  pdev->dev.platform_data, wm8400);
 
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
 
+	platform_set_drvdata(pdev, rdev);
+
 	return 0;
 }
 
@@ -332,6 +335,7 @@
 {
 	struct regulator_dev *rdev = platform_get_drvdata(pdev);
 
+	platform_set_drvdata(pdev, NULL);
 	regulator_unregister(rdev);
 
 	return 0;
@@ -370,7 +374,6 @@
 	wm8400->regulators[reg].id = reg;
 	wm8400->regulators[reg].dev.parent = dev;
 	wm8400->regulators[reg].dev.platform_data = initdata;
-	dev_set_drvdata(&wm8400->regulators[reg].dev, wm8400);
 
 	return platform_device_register(&wm8400->regulators[reg]);
 }
diff --git a/drivers/regulator/wm8994-regulator.c b/drivers/regulator/wm8994-regulator.c
new file mode 100644
index 0000000..95454a4
--- /dev/null
+++ b/drivers/regulator/wm8994-regulator.c
@@ -0,0 +1,307 @@
+/*
+ * wm8994-regulator.c  --  Regulator driver for the WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/gpio.h>
+
+#include <linux/mfd/wm8994/core.h>
+#include <linux/mfd/wm8994/registers.h>
+#include <linux/mfd/wm8994/pdata.h>
+
+struct wm8994_ldo {
+	int enable;
+	bool is_enabled;
+	struct regulator_dev *regulator;
+	struct wm8994 *wm8994;
+};
+
+#define WM8994_LDO1_MAX_SELECTOR 0x7
+#define WM8994_LDO2_MAX_SELECTOR 0x3
+
+static int wm8994_ldo_enable(struct regulator_dev *rdev)
+{
+	struct wm8994_ldo *ldo = rdev_get_drvdata(rdev);
+
+	/* If we have no soft control assume that the LDO is always enabled. */
+	if (!ldo->enable)
+		return 0;
+
+	gpio_set_value(ldo->enable, 1);
+	ldo->is_enabled = true;
+
+	return 0;
+}
+
+static int wm8994_ldo_disable(struct regulator_dev *rdev)
+{
+	struct wm8994_ldo *ldo = rdev_get_drvdata(rdev);
+
+	/* If we have no soft control assume that the LDO is always enabled. */
+	if (!ldo->enable)
+		return -EINVAL;
+
+	gpio_set_value(ldo->enable, 0);
+	ldo->is_enabled = false;
+
+	return 0;
+}
+
+static int wm8994_ldo_is_enabled(struct regulator_dev *rdev)
+{
+	struct wm8994_ldo *ldo = rdev_get_drvdata(rdev);
+
+	return ldo->is_enabled;
+}
+
+static int wm8994_ldo_enable_time(struct regulator_dev *rdev)
+{
+	/* 3ms is fairly conservative but this shouldn't be too performance
+	 * critical; can be tweaked per-system if required. */
+	return 3000;
+}
+
+static int wm8994_ldo1_list_voltage(struct regulator_dev *rdev,
+				    unsigned int selector)
+{
+	if (selector > WM8994_LDO1_MAX_SELECTOR)
+		return -EINVAL;
+
+	return (selector * 100000) + 2400000;
+}
+
+static int wm8994_ldo1_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm8994_ldo *ldo = rdev_get_drvdata(rdev);
+	int val;
+
+	val = wm8994_reg_read(ldo->wm8994, WM8994_LDO_1);
+	if (val < 0)
+		return val;
+
+	val = (val & WM8994_LDO1_VSEL_MASK) >> WM8994_LDO1_VSEL_SHIFT;
+
+	return wm8994_ldo1_list_voltage(rdev, val);
+}
+
+static int wm8994_ldo1_set_voltage(struct regulator_dev *rdev,
+				   int min_uV, int max_uV)
+{
+	struct wm8994_ldo *ldo = rdev_get_drvdata(rdev);
+	int selector, v;
+
+	selector = (min_uV - 2400000) / 100000;
+	v = wm8994_ldo1_list_voltage(rdev, selector);
+	if (v < 0 || v > max_uV)
+		return -EINVAL;
+
+	selector <<= WM8994_LDO1_VSEL_SHIFT;
+
+	return wm8994_set_bits(ldo->wm8994, WM8994_LDO_1,
+			       WM8994_LDO1_VSEL_MASK, selector);
+}
+
+static struct regulator_ops wm8994_ldo1_ops = {
+	.enable = wm8994_ldo_enable,
+	.disable = wm8994_ldo_disable,
+	.is_enabled = wm8994_ldo_is_enabled,
+	.enable_time = wm8994_ldo_enable_time,
+
+	.list_voltage = wm8994_ldo1_list_voltage,
+	.get_voltage = wm8994_ldo1_get_voltage,
+	.set_voltage = wm8994_ldo1_set_voltage,
+};
+
+static int wm8994_ldo2_list_voltage(struct regulator_dev *rdev,
+				    unsigned int selector)
+{
+	if (selector > WM8994_LDO2_MAX_SELECTOR)
+		return -EINVAL;
+
+	return (selector * 100000) + 900000;
+}
+
+static int wm8994_ldo2_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm8994_ldo *ldo = rdev_get_drvdata(rdev);
+	int val;
+
+	val = wm8994_reg_read(ldo->wm8994, WM8994_LDO_2);
+	if (val < 0)
+		return val;
+
+	val = (val & WM8994_LDO2_VSEL_MASK) >> WM8994_LDO2_VSEL_SHIFT;
+
+	return wm8994_ldo2_list_voltage(rdev, val);
+}
+
+static int wm8994_ldo2_set_voltage(struct regulator_dev *rdev,
+				   int min_uV, int max_uV)
+{
+	struct wm8994_ldo *ldo = rdev_get_drvdata(rdev);
+	int selector, v;
+
+	selector = (min_uV - 900000) / 100000;
+	v = wm8994_ldo2_list_voltage(rdev, selector);
+	if (v < 0 || v > max_uV)
+		return -EINVAL;
+
+	selector <<= WM8994_LDO2_VSEL_SHIFT;
+
+	return wm8994_set_bits(ldo->wm8994, WM8994_LDO_2,
+			       WM8994_LDO2_VSEL_MASK, selector);
+}
+
+static struct regulator_ops wm8994_ldo2_ops = {
+	.enable = wm8994_ldo_enable,
+	.disable = wm8994_ldo_disable,
+	.is_enabled = wm8994_ldo_is_enabled,
+	.enable_time = wm8994_ldo_enable_time,
+
+	.list_voltage = wm8994_ldo2_list_voltage,
+	.get_voltage = wm8994_ldo2_get_voltage,
+	.set_voltage = wm8994_ldo2_set_voltage,
+};
+
+static struct regulator_desc wm8994_ldo_desc[] = {
+	{
+		.name = "LDO1",
+		.id = 1,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = WM8994_LDO1_MAX_SELECTOR + 1,
+		.ops = &wm8994_ldo1_ops,
+		.owner = THIS_MODULE,
+	},
+	{
+		.name = "LDO2",
+		.id = 2,
+		.type = REGULATOR_VOLTAGE,
+		.n_voltages = WM8994_LDO2_MAX_SELECTOR + 1,
+		.ops = &wm8994_ldo2_ops,
+		.owner = THIS_MODULE,
+	},
+};
+
+static __devinit int wm8994_ldo_probe(struct platform_device *pdev)
+{
+	struct wm8994 *wm8994 = dev_get_drvdata(pdev->dev.parent);
+	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	struct wm8994_ldo *ldo;
+	int ret;
+
+	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
+
+	if (!pdata)
+		return -ENODEV;
+
+	ldo = kzalloc(sizeof(struct wm8994_ldo), GFP_KERNEL);
+	if (ldo == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	ldo->wm8994 = wm8994;
+
+	ldo->is_enabled = true;
+
+	if (pdata->ldo[id].enable && gpio_is_valid(pdata->ldo[id].enable)) {
+		ldo->enable = pdata->ldo[id].enable;
+
+		ret = gpio_request(ldo->enable, "WM8994 LDO enable");
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Failed to get enable GPIO: %d\n",
+				ret);
+			goto err;
+		}
+
+		ret = gpio_direction_output(ldo->enable, ldo->is_enabled);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Failed to set GPIO up: %d\n",
+				ret);
+			goto err_gpio;
+		}
+	}
+
+	ldo->regulator = regulator_register(&wm8994_ldo_desc[id], &pdev->dev,
+					     pdata->ldo[id].init_data, ldo);
+	if (IS_ERR(ldo->regulator)) {
+		ret = PTR_ERR(ldo->regulator);
+		dev_err(wm8994->dev, "Failed to register LDO%d: %d\n",
+			id + 1, ret);
+		goto err_gpio;
+	}
+
+	platform_set_drvdata(pdev, ldo);
+
+	return 0;
+
+err_gpio:
+	if (gpio_is_valid(ldo->enable))
+		gpio_free(ldo->enable);
+err:
+	kfree(ldo);
+	return ret;
+}
+
+static __devexit int wm8994_ldo_remove(struct platform_device *pdev)
+{
+	struct wm8994_ldo *ldo = platform_get_drvdata(pdev);
+
+	platform_set_drvdata(pdev, NULL);
+
+	regulator_unregister(ldo->regulator);
+	if (gpio_is_valid(ldo->enable))
+		gpio_free(ldo->enable);
+	kfree(ldo);
+
+	return 0;
+}
+
+static struct platform_driver wm8994_ldo_driver = {
+	.probe = wm8994_ldo_probe,
+	.remove = __devexit_p(wm8994_ldo_remove),
+	.driver		= {
+		.name	= "wm8994-ldo",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init wm8994_ldo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&wm8994_ldo_driver);
+	if (ret != 0)
+		pr_err("Failed to register Wm8994 GP LDO driver: %d\n", ret);
+
+	return ret;
+}
+subsys_initcall(wm8994_ldo_init);
+
+static void __exit wm8994_ldo_exit(void)
+{
+	platform_driver_unregister(&wm8994_ldo_driver);
+}
+module_exit(wm8994_ldo_exit);
+
+/* Module information */
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("WM8994 LDO driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm8994-ldo");
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index f69b778..11fc4d5 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -969,7 +969,7 @@
 
 	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 	    (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-		err = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		err = dquot_transfer(inode, attr);
 		if (err)
 			goto err_out_exit;
 	}
diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index b1935fe..5a3cdd0 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -1050,7 +1050,7 @@
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
-	VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc);
+	VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc);
 }
 
 static int do_verify(struct fsg_common *common)
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index a90dd2d..b49d86e 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -1448,7 +1448,7 @@
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
-	VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc);
+	VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc);
 }
 
 static int do_verify(struct fsg_dev *fsg)
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 3681c6a..b0a3fa0 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -3025,6 +3025,20 @@
 	return 0;
 }
 
+static void fbcon_remap_all(int idx)
+{
+	int i;
+	for (i = first_fb_vc; i <= last_fb_vc; i++)
+		set_con2fb_map(i, idx, 0);
+
+	if (con_is_bound(&fb_con)) {
+		printk(KERN_INFO "fbcon: Remapping primary device, "
+		       "fb%i, to tty %i-%i\n", idx,
+		       first_fb_vc + 1, last_fb_vc + 1);
+		info_idx = idx;
+	}
+}
+
 #ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
 static void fbcon_select_primary(struct fb_info *info)
 {
@@ -3225,6 +3239,10 @@
 		caps = event->data;
 		fbcon_get_requirement(info, caps);
 		break;
+	case FB_EVENT_REMAP_ALL_CONSOLE:
+		idx = info->node;
+		fbcon_remap_all(idx);
+		break;
 	}
 done:
 	return ret;
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 99bbd28..a15b44e 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1513,7 +1513,6 @@
 				       fb_info->fix.id,
 				       registered_fb[i]->fix.id);
 				unregister_framebuffer(registered_fb[i]);
-				break;
 			}
 		}
 	}
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 14d9442..08b2eb1 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -151,7 +151,7 @@
 			if (access == V9FS_ACCESS_SINGLE)
 				return ERR_PTR(-EPERM);
 
-			if (v9fs_extended(v9ses))
+			if (v9fs_proto_dotu(v9ses))
 				uname = NULL;
 			else
 				uname = v9ses->uname;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 7d6c213..6c7f6a2 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -241,7 +241,7 @@
 	list_add(&v9ses->slist, &v9fs_sessionlist);
 	spin_unlock(&v9fs_sessionlist_lock);
 
-	v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER;
+	v9ses->flags = V9FS_PROTO_2000U | V9FS_ACCESS_USER;
 	strcpy(v9ses->uname, V9FS_DEFUSER);
 	strcpy(v9ses->aname, V9FS_DEFANAME);
 	v9ses->uid = ~0;
@@ -262,13 +262,13 @@
 		goto error;
 	}
 
-	if (!v9ses->clnt->dotu)
-		v9ses->flags &= ~V9FS_EXTENDED;
+	if (!p9_is_proto_dotu(v9ses->clnt))
+		v9ses->flags &= ~V9FS_PROTO_2000U;
 
 	v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
 
 	/* for legacy mode, fall back to V9FS_ACCESS_ANY */
-	if (!v9fs_extended(v9ses) &&
+	if (!v9fs_proto_dotu(v9ses) &&
 		((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
 
 		v9ses->flags &= ~V9FS_ACCESS_MASK;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 019f4cc..79000bf 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -23,7 +23,8 @@
 
 /**
  * enum p9_session_flags - option flags for each 9P session
- * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions
+ * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions
+ * @V9FS_PROTO_2010L: whether or not to use 9P2010.l extensions
  * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
  * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
  * @V9FS_ACCESS_ANY: use a single attach for all users
@@ -32,11 +33,12 @@
  * Session flags reflect options selected by users at mount time
  */
 enum p9_session_flags {
-	V9FS_EXTENDED		= 0x01,
-	V9FS_ACCESS_SINGLE	= 0x02,
-	V9FS_ACCESS_USER	= 0x04,
-	V9FS_ACCESS_ANY		= 0x06,
-	V9FS_ACCESS_MASK	= 0x06,
+	V9FS_PROTO_2000U	= 0x01,
+	V9FS_PROTO_2010L	= 0x02,
+	V9FS_ACCESS_SINGLE	= 0x04,
+	V9FS_ACCESS_USER	= 0x08,
+	V9FS_ACCESS_ANY		= 0x0C,
+	V9FS_ACCESS_MASK	= 0x0C,
 };
 
 /* possible values of ->cache */
@@ -121,7 +123,12 @@
 	return (inode->i_sb->s_fs_info);
 }
 
-static inline int v9fs_extended(struct v9fs_session_info *v9ses)
+static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses)
 {
-	return v9ses->flags & V9FS_EXTENDED;
+	return v9ses->flags & V9FS_PROTO_2000U;
+}
+
+static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
+{
+	return v9ses->flags & V9FS_PROTO_2010L;
 }
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 15cce53..6580aa4 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -135,7 +135,7 @@
 		while (rdir->head < rdir->tail) {
 			err = p9stat_read(rdir->buf + rdir->head,
 						buflen - rdir->head, &st,
-						fid->clnt->dotu);
+						fid->clnt->proto_version);
 			if (err) {
 				P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
 				err = -EIO;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 74a0461..3612268 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -61,7 +61,7 @@
 
 	P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
 	v9ses = v9fs_inode2v9ses(inode);
-	omode = v9fs_uflags2omode(file->f_flags, v9fs_extended(v9ses));
+	omode = v9fs_uflags2omode(file->f_flags, v9fs_proto_dotu(v9ses));
 	fid = file->private_data;
 	if (!fid) {
 		fid = v9fs_fid_clone(file->f_path.dentry);
@@ -77,7 +77,7 @@
 			i_size_write(inode, 0);
 			inode->i_blocks = 0;
 		}
-		if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses)))
+		if ((file->f_flags & O_APPEND) && (!v9fs_proto_dotu(v9ses)))
 			generic_file_llseek(file, 0, SEEK_END);
 	}
 
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index a407fa3..5fe45d6 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -60,7 +60,7 @@
 	res = mode & 0777;
 	if (S_ISDIR(mode))
 		res |= P9_DMDIR;
-	if (v9fs_extended(v9ses)) {
+	if (v9fs_proto_dotu(v9ses)) {
 		if (S_ISLNK(mode))
 			res |= P9_DMSYMLINK;
 		if (v9ses->nodev == 0) {
@@ -102,21 +102,21 @@
 
 	if ((mode & P9_DMDIR) == P9_DMDIR)
 		res |= S_IFDIR;
-	else if ((mode & P9_DMSYMLINK) && (v9fs_extended(v9ses)))
+	else if ((mode & P9_DMSYMLINK) && (v9fs_proto_dotu(v9ses)))
 		res |= S_IFLNK;
-	else if ((mode & P9_DMSOCKET) && (v9fs_extended(v9ses))
+	else if ((mode & P9_DMSOCKET) && (v9fs_proto_dotu(v9ses))
 		 && (v9ses->nodev == 0))
 		res |= S_IFSOCK;
-	else if ((mode & P9_DMNAMEDPIPE) && (v9fs_extended(v9ses))
+	else if ((mode & P9_DMNAMEDPIPE) && (v9fs_proto_dotu(v9ses))
 		 && (v9ses->nodev == 0))
 		res |= S_IFIFO;
-	else if ((mode & P9_DMDEVICE) && (v9fs_extended(v9ses))
+	else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses))
 		 && (v9ses->nodev == 0))
 		res |= S_IFBLK;
 	else
 		res |= S_IFREG;
 
-	if (v9fs_extended(v9ses)) {
+	if (v9fs_proto_dotu(v9ses)) {
 		if ((mode & P9_DMSETUID) == P9_DMSETUID)
 			res |= S_ISUID;
 
@@ -265,7 +265,7 @@
 	case S_IFBLK:
 	case S_IFCHR:
 	case S_IFSOCK:
-		if (!v9fs_extended(v9ses)) {
+		if (!v9fs_proto_dotu(v9ses)) {
 			P9_DPRINTK(P9_DEBUG_ERROR,
 				   "special files without extended mode\n");
 			err = -EINVAL;
@@ -278,7 +278,7 @@
 		inode->i_fop = &v9fs_file_operations;
 		break;
 	case S_IFLNK:
-		if (!v9fs_extended(v9ses)) {
+		if (!v9fs_proto_dotu(v9ses)) {
 			P9_DPRINTK(P9_DEBUG_ERROR,
 				   "extended modes used w/o 9P2000.u\n");
 			err = -EINVAL;
@@ -288,7 +288,7 @@
 		break;
 	case S_IFDIR:
 		inc_nlink(inode);
-		if (v9fs_extended(v9ses))
+		if (v9fs_proto_dotu(v9ses))
 			inode->i_op = &v9fs_dir_inode_operations_ext;
 		else
 			inode->i_op = &v9fs_dir_inode_operations;
@@ -575,7 +575,8 @@
 		flags = O_RDWR;
 
 	fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
-				v9fs_uflags2omode(flags, v9fs_extended(v9ses)));
+				v9fs_uflags2omode(flags,
+						v9fs_proto_dotu(v9ses)));
 	if (IS_ERR(fid)) {
 		err = PTR_ERR(fid);
 		fid = NULL;
@@ -858,7 +859,7 @@
 	if (iattr->ia_valid & ATTR_SIZE)
 		wstat.length = iattr->ia_size;
 
-	if (v9fs_extended(v9ses)) {
+	if (v9fs_proto_dotu(v9ses)) {
 		if (iattr->ia_valid & ATTR_UID)
 			wstat.n_uid = iattr->ia_uid;
 
@@ -886,6 +887,8 @@
 	struct super_block *sb)
 {
 	char ext[32];
+	char tag_name[14];
+	unsigned int i_nlink;
 	struct v9fs_session_info *v9ses = sb->s_fs_info;
 
 	inode->i_nlink = 1;
@@ -897,11 +900,26 @@
 	inode->i_uid = v9ses->dfltuid;
 	inode->i_gid = v9ses->dfltgid;
 
-	if (v9fs_extended(v9ses)) {
+	if (v9fs_proto_dotu(v9ses)) {
 		inode->i_uid = stat->n_uid;
 		inode->i_gid = stat->n_gid;
 	}
-
+	if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) {
+		if (v9fs_proto_dotu(v9ses) && (stat->extension[0] != '\0')) {
+			/*
+			 * Hadlink support got added later to
+			 * to the .u extension. So there can be
+			 * server out there that doesn't support
+			 * this even with .u extension. So check
+			 * for non NULL stat->extension
+			 */
+			strncpy(ext, stat->extension, sizeof(ext));
+			/* HARDLINKCOUNT %u */
+			sscanf(ext, "%13s %u", tag_name, &i_nlink);
+			if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
+				inode->i_nlink = i_nlink;
+		}
+	}
 	inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
 	if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
 		char type = 0;
@@ -976,7 +994,7 @@
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
-	if (!v9fs_extended(v9ses))
+	if (!v9fs_proto_dotu(v9ses))
 		return -EBADF;
 
 	st = p9_client_stat(fid);
@@ -1066,7 +1084,7 @@
 	struct p9_fid *fid;
 
 	v9ses = v9fs_inode2v9ses(dir);
-	if (!v9fs_extended(v9ses)) {
+	if (!v9fs_proto_dotu(v9ses)) {
 		P9_DPRINTK(P9_DEBUG_ERROR, "not extended\n");
 		return -EPERM;
 	}
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 9cc1877..2ff622f 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -121,7 +121,7 @@
 
 /* Inode stuff */
 struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
-int adfs_write_inode(struct inode *inode,int unused);
+int adfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
 
 /* map.c */
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 3f57ce4..0f5e309 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -9,6 +9,7 @@
  */
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include "adfs.h"
 
 /*
@@ -360,7 +361,7 @@
  * The adfs-specific inode data has already been updated by
  * adfs_notify_change()
  */
-int adfs_write_inode(struct inode *inode, int wait)
+int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct super_block *sb = inode->i_sb;
 	struct object_info obj;
@@ -375,7 +376,7 @@
 	obj.attr	= ADFS_I(inode)->attr;
 	obj.size	= inode->i_size;
 
-	ret = adfs_dir_update(sb, &obj, wait);
+	ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e40caa..861dae6 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -175,7 +175,8 @@
 extern void			 affs_clear_inode(struct inode *inode);
 extern struct inode		*affs_iget(struct super_block *sb,
 					unsigned long ino);
-extern int			 affs_write_inode(struct inode *inode, int);
+extern int			 affs_write_inode(struct inode *inode,
+					struct writeback_control *wbc);
 extern int			 affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
 
 /* file.c */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3c4ec7d..c9744d7 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -166,7 +166,7 @@
 }
 
 int
-affs_write_inode(struct inode *inode, int unused)
+affs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct super_block	*sb = inode->i_sb;
 	struct buffer_head	*bh;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6ece2a1..c54dad4 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -733,7 +733,6 @@
 			struct page *page, void *fsdata);
 extern int afs_writepage(struct page *, struct writeback_control *);
 extern int afs_writepages(struct address_space *, struct writeback_control *);
-extern int afs_write_inode(struct inode *, int);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
 			      unsigned long, loff_t);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e1ea1c2..14f6431 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -48,7 +48,6 @@
 static const struct super_operations afs_super_ops = {
 	.statfs		= afs_statfs,
 	.alloc_inode	= afs_alloc_inode,
-	.write_inode	= afs_write_inode,
 	.destroy_inode	= afs_destroy_inode,
 	.clear_inode	= afs_clear_inode,
 	.put_super	= afs_put_super,
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 5e15a21..3bed54a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -585,27 +585,6 @@
 }
 
 /*
- * write an inode back
- */
-int afs_write_inode(struct inode *inode, int sync)
-{
-	struct afs_vnode *vnode = AFS_FS_I(inode);
-	int ret;
-
-	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
-
-	ret = 0;
-	if (sync) {
-		ret = filemap_fdatawait(inode->i_mapping);
-		if (ret < 0)
-			__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
  * completion of write to server
  */
 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
diff --git a/fs/attr.c b/fs/attr.c
index 96d394b..0a6ea54c 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -12,7 +12,6 @@
 #include <linux/capability.h>
 #include <linux/fsnotify.h>
 #include <linux/fcntl.h>
-#include <linux/quotaops.h>
 #include <linux/security.h>
 
 /* Taken over from the old code... */
@@ -212,14 +211,8 @@
 		error = inode->i_op->setattr(dentry, attr);
 	} else {
 		error = inode_change_ok(inode, attr);
-		if (!error) {
-			if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-			    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
-				error = vfs_dq_transfer(inode, attr) ?
-					-EDQUOT : 0;
-			if (!error)
-				error = inode_setattr(inode, attr);
-		}
+		if (!error)
+			error = inode_setattr(inode, attr);
 	}
 
 	if (ia_valid & ATTR_SIZE)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 0118d67..3d283ab 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -60,11 +60,6 @@
 		current->pid, __func__, ##args);	\
 } while (0)
 
-struct rehash_entry {
-	struct task_struct *task;
-	struct list_head list;
-};
-
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
@@ -81,7 +76,6 @@
 
 	struct list_head active;
 	int active_count;
-	struct list_head rehash_list;
 
 	struct list_head expiring;
 
@@ -104,7 +98,6 @@
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry is in the process of expiring */
 #define AUTOFS_INF_MOUNTPOINT	(1<<1) /* mountpoint status for direct expire */
 #define AUTOFS_INF_PENDING	(1<<2) /* dentry pending mount */
-#define AUTOFS_INF_REHASH	(1<<3) /* dentry in transit to ->lookup() */
 
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 00bf8fc..c8a80df 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -544,10 +544,9 @@
 			goto out;
 		devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
 		err = 0;
-		if (path.dentry->d_inode &&
-		    path.mnt->mnt_root == path.dentry) {
+		if (path.mnt->mnt_root == path.dentry) {
 			err = 1;
-			magic = path.dentry->d_inode->i_sb->s_magic;
+			magic = path.mnt->mnt_sb->s_magic;
 		}
 	} else {
 		dev_t dev = sbi->sb->s_dev;
@@ -560,10 +559,8 @@
 
 		err = have_submounts(path.dentry);
 
-		if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) {
-			if (follow_down(&path))
-				magic = path.mnt->mnt_sb->s_magic;
-		}
+		if (follow_down(&path))
+			magic = path.mnt->mnt_sb->s_magic;
 	}
 
 	param->ismountpoint.out.devid = devid;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 74bc9aa..a796c94 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -279,7 +279,6 @@
 			root->d_mounted--;
 		}
 		ino->flags |= AUTOFS_INF_EXPIRING;
-		autofs4_add_expiring(root);
 		init_completion(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		return root;
@@ -407,7 +406,6 @@
 		expired, (int)expired->d_name.len, expired->d_name.name);
 	ino = autofs4_dentry_ino(expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
-	autofs4_add_expiring(expired);
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 	spin_lock(&dcache_lock);
@@ -435,7 +433,7 @@
 
 		DPRINTK("expire done status=%d", status);
 
-		if (d_unhashed(dentry) && IS_DEADDIR(dentry->d_inode))
+		if (d_unhashed(dentry))
 			return -EAGAIN;
 
 		return status;
@@ -475,7 +473,6 @@
 	spin_lock(&sbi->fs_lock);
 	ino = autofs4_dentry_ino(dentry);
 	ino->flags &= ~AUTOFS_INF_EXPIRING;
-	autofs4_del_expiring(dentry);
 	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 
@@ -506,7 +503,6 @@
 			ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
 		}
 		ino->flags &= ~AUTOFS_INF_EXPIRING;
-		autofs4_del_expiring(dentry);
 		complete_all(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d0a3de2..821b2b9 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -49,7 +49,6 @@
 		ino->dentry = NULL;
 		ino->size = 0;
 		INIT_LIST_HEAD(&ino->active);
-		INIT_LIST_HEAD(&ino->rehash_list);
 		ino->active_count = 0;
 		INIT_LIST_HEAD(&ino->expiring);
 		atomic_set(&ino->count, 0);
@@ -97,63 +96,6 @@
 	kfree(ino);
 }
 
-/*
- * Deal with the infamous "Busy inodes after umount ..." message.
- *
- * Clean up the dentry tree. This happens with autofs if the user
- * space program goes away due to a SIGKILL, SIGSEGV etc.
- */
-static void autofs4_force_release(struct autofs_sb_info *sbi)
-{
-	struct dentry *this_parent = sbi->sb->s_root;
-	struct list_head *next;
-
-	if (!sbi->sb->s_root)
-		return;
-
-	spin_lock(&dcache_lock);
-repeat:
-	next = this_parent->d_subdirs.next;
-resume:
-	while (next != &this_parent->d_subdirs) {
-		struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
-		/* Negative dentry - don`t care */
-		if (!simple_positive(dentry)) {
-			next = next->next;
-			continue;
-		}
-
-		if (!list_empty(&dentry->d_subdirs)) {
-			this_parent = dentry;
-			goto repeat;
-		}
-
-		next = next->next;
-		spin_unlock(&dcache_lock);
-
-		DPRINTK("dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
-
-		dput(dentry);
-		spin_lock(&dcache_lock);
-	}
-
-	if (this_parent != sbi->sb->s_root) {
-		struct dentry *dentry = this_parent;
-
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
-		spin_unlock(&dcache_lock);
-		DPRINTK("parent dentry %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
-		dput(dentry);
-		spin_lock(&dcache_lock);
-		goto resume;
-	}
-	spin_unlock(&dcache_lock);
-}
-
 void autofs4_kill_sb(struct super_block *sb)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(sb);
@@ -170,15 +112,12 @@
 	/* Free wait queues, close pipe */
 	autofs4_catatonic_mode(sbi);
 
-	/* Clean up and release dangling references */
-	autofs4_force_release(sbi);
-
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 
 out_kill_sb:
 	DPRINTK("shutting down");
-	kill_anon_super(sb);
+	kill_litter_super(sb);
 }
 
 static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 30cc9dd..a015b49 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -104,99 +104,6 @@
 	return;
 }
 
-static void autofs4_add_rehash_entry(struct autofs_info *ino,
-				     struct rehash_entry *entry)
-{
-	entry->task = current;
-	INIT_LIST_HEAD(&entry->list);
-	list_add(&entry->list, &ino->rehash_list);
-	return;
-}
-
-static void autofs4_remove_rehash_entry(struct autofs_info *ino)
-{
-	struct list_head *head = &ino->rehash_list;
-	struct rehash_entry *entry;
-	list_for_each_entry(entry, head, list) {
-		if (entry->task == current) {
-			list_del(&entry->list);
-			kfree(entry);
-			break;
-		}
-	}
-	return;
-}
-
-static void autofs4_remove_rehash_entrys(struct autofs_info *ino)
-{
-	struct autofs_sb_info *sbi = ino->sbi;
-	struct rehash_entry *entry, *next;
-	struct list_head *head;
-
-	spin_lock(&sbi->fs_lock);
-	spin_lock(&sbi->lookup_lock);
-	if (!(ino->flags & AUTOFS_INF_REHASH)) {
-		spin_unlock(&sbi->lookup_lock);
-		spin_unlock(&sbi->fs_lock);
-		return;
-	}
-	ino->flags &= ~AUTOFS_INF_REHASH;
-	head = &ino->rehash_list;
-	list_for_each_entry_safe(entry, next, head, list) {
-		list_del(&entry->list);
-		kfree(entry);
-	}
-	spin_unlock(&sbi->lookup_lock);
-	spin_unlock(&sbi->fs_lock);
-	dput(ino->dentry);
-
-	return;
-}
-
-static void autofs4_revalidate_drop(struct dentry *dentry,
-				    struct rehash_entry *entry)
-{
-	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-	struct autofs_info *ino = autofs4_dentry_ino(dentry);
-	/*
-	 * Add to the active list so we can pick this up in
-	 * ->lookup(). Also add an entry to a rehash list so
-	 * we know when there are no dentrys in flight so we
-	 * know when we can rehash the dentry.
-	 */
-	spin_lock(&sbi->lookup_lock);
-	if (list_empty(&ino->active))
-		list_add(&ino->active, &sbi->active_list);
-	autofs4_add_rehash_entry(ino, entry);
-	spin_unlock(&sbi->lookup_lock);
-	if (!(ino->flags & AUTOFS_INF_REHASH)) {
-		ino->flags |= AUTOFS_INF_REHASH;
-		dget(dentry);
-		spin_lock(&dentry->d_lock);
-		__d_drop(dentry);
-		spin_unlock(&dentry->d_lock);
-	}
-	return;
-}
-
-static void autofs4_revalidate_rehash(struct dentry *dentry)
-{
-	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
-	struct autofs_info *ino = autofs4_dentry_ino(dentry);
-	if (ino->flags & AUTOFS_INF_REHASH) {
-		spin_lock(&sbi->lookup_lock);
-		autofs4_remove_rehash_entry(ino);
-		if (list_empty(&ino->rehash_list)) {
-			spin_unlock(&sbi->lookup_lock);
-			ino->flags &= ~AUTOFS_INF_REHASH;
-			d_rehash(dentry);
-			dput(ino->dentry);
-		} else
-			spin_unlock(&sbi->lookup_lock);
-	}
-	return;
-}
-
 static unsigned int autofs4_need_mount(unsigned int flags)
 {
 	unsigned int res = 0;
@@ -236,7 +143,7 @@
 	return dcache_dir_open(inode, file);
 }
 
-static int try_to_fill_dentry(struct dentry *dentry)
+static int try_to_fill_dentry(struct dentry *dentry, int flags)
 {
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -249,17 +156,55 @@
 	 * Wait for a pending mount, triggering one if there
 	 * isn't one already
 	 */
-	DPRINTK("waiting for mount name=%.*s",
-		 dentry->d_name.len, dentry->d_name.name);
+	if (dentry->d_inode == NULL) {
+		DPRINTK("waiting for mount name=%.*s",
+			 dentry->d_name.len, dentry->d_name.name);
 
-	status = autofs4_wait(sbi, dentry, NFY_MOUNT);
+		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
 
-	DPRINTK("mount done status=%d", status);
+		DPRINTK("mount done status=%d", status);
 
-	/* Update expiry counter */
-	ino->last_used = jiffies;
+		/* Turn this into a real negative dentry? */
+		if (status == -ENOENT) {
+			spin_lock(&sbi->fs_lock);
+			ino->flags &= ~AUTOFS_INF_PENDING;
+			spin_unlock(&sbi->fs_lock);
+			return status;
+		} else if (status) {
+			/* Return a negative dentry, but leave it "pending" */
+			return status;
+		}
+	/* Trigger mount for path component or follow link */
+	} else if (ino->flags & AUTOFS_INF_PENDING ||
+			autofs4_need_mount(flags) ||
+			current->link_count) {
+		DPRINTK("waiting for mount name=%.*s",
+			dentry->d_name.len, dentry->d_name.name);
 
-	return status;
+		spin_lock(&sbi->fs_lock);
+		ino->flags |= AUTOFS_INF_PENDING;
+		spin_unlock(&sbi->fs_lock);
+		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
+
+		DPRINTK("mount done status=%d", status);
+
+		if (status) {
+			spin_lock(&sbi->fs_lock);
+			ino->flags &= ~AUTOFS_INF_PENDING;
+			spin_unlock(&sbi->fs_lock);
+			return status;
+		}
+	}
+
+	/* Initialize expiry counter after successful mount */
+	if (ino)
+		ino->last_used = jiffies;
+
+	spin_lock(&sbi->fs_lock);
+	ino->flags &= ~AUTOFS_INF_PENDING;
+	spin_unlock(&sbi->fs_lock);
+
+	return 0;
 }
 
 /* For autofs direct mounts the follow link triggers the mount */
@@ -313,16 +258,10 @@
 	 */
 	if (ino->flags & AUTOFS_INF_PENDING ||
 	    (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
-		ino->flags |= AUTOFS_INF_PENDING;
 		spin_unlock(&dcache_lock);
 		spin_unlock(&sbi->fs_lock);
 
-		status = try_to_fill_dentry(dentry);
-
-		spin_lock(&sbi->fs_lock);
-		ino->flags &= ~AUTOFS_INF_PENDING;
-		spin_unlock(&sbi->fs_lock);
-
+		status = try_to_fill_dentry(dentry, 0);
 		if (status)
 			goto out_error;
 
@@ -361,47 +300,18 @@
 {
 	struct inode *dir = dentry->d_parent->d_inode;
 	struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
-	struct autofs_info *ino = autofs4_dentry_ino(dentry);
-	struct rehash_entry *entry;
+	int oz_mode = autofs4_oz_mode(sbi);
 	int flags = nd ? nd->flags : 0;
-	unsigned int mutex_aquired;
+	int status = 1;
 
-	DPRINTK("name = %.*s oz_mode = %d",
-		dentry->d_name.len, dentry->d_name.name, oz_mode);
-
-	/* Daemon never causes a mount to trigger */
-	if (autofs4_oz_mode(sbi))
-		return 1;
-
-	entry = kmalloc(sizeof(struct rehash_entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	mutex_aquired = mutex_trylock(&dir->i_mutex);
-
-	spin_lock(&sbi->fs_lock);
-	spin_lock(&dcache_lock);
 	/* Pending dentry */
+	spin_lock(&sbi->fs_lock);
 	if (autofs4_ispending(dentry)) {
-		int status;
-
-		/*
-		 * We can only unhash and send this to ->lookup() if
-		 * the directory mutex is held over d_revalidate() and
-		 * ->lookup(). This prevents the VFS from incorrectly
-		 * seeing the dentry as non-existent.
-		 */
-		ino->flags |= AUTOFS_INF_PENDING;
-		if (!mutex_aquired) {
-			autofs4_revalidate_drop(dentry, entry);
-			spin_unlock(&dcache_lock);
-			spin_unlock(&sbi->fs_lock);
-			return 0;
-		}
-		spin_unlock(&dcache_lock);
+		/* The daemon never causes a mount to trigger */
 		spin_unlock(&sbi->fs_lock);
-		mutex_unlock(&dir->i_mutex);
-		kfree(entry);
+
+		if (oz_mode)
+			return 1;
 
 		/*
 		 * If the directory has gone away due to an expire
@@ -415,82 +325,45 @@
 		 * A zero status is success otherwise we have a
 		 * negative error code.
 		 */
-		status = try_to_fill_dentry(dentry);
-
-		spin_lock(&sbi->fs_lock);
-		ino->flags &= ~AUTOFS_INF_PENDING;
-		spin_unlock(&sbi->fs_lock);
-
+		status = try_to_fill_dentry(dentry, flags);
 		if (status == 0)
 			return 1;
 
 		return status;
 	}
+	spin_unlock(&sbi->fs_lock);
+
+	/* Negative dentry.. invalidate if "old" */
+	if (dentry->d_inode == NULL)
+		return 0;
 
 	/* Check for a non-mountpoint directory with no contents */
+	spin_lock(&dcache_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
+		spin_unlock(&dcache_lock);
 
-		if (autofs4_need_mount(flags) || current->link_count) {
-			int status;
+		/* The daemon never causes a mount to trigger */
+		if (oz_mode)
+			return 1;
 
-			/*
-			 * We can only unhash and send this to ->lookup() if
-			 * the directory mutex is held over d_revalidate() and
-			 * ->lookup(). This prevents the VFS from incorrectly
-			 * seeing the dentry as non-existent.
-			 */
-			ino->flags |= AUTOFS_INF_PENDING;
-			if (!mutex_aquired) {
-				autofs4_revalidate_drop(dentry, entry);
-				spin_unlock(&dcache_lock);
-				spin_unlock(&sbi->fs_lock);
-				return 0;
-			}
-			spin_unlock(&dcache_lock);
-			spin_unlock(&sbi->fs_lock);
-			mutex_unlock(&dir->i_mutex);
-			kfree(entry);
+		/*
+		 * A zero status is success otherwise we have a
+		 * negative error code.
+		 */
+		status = try_to_fill_dentry(dentry, flags);
+		if (status == 0)
+			return 1;
 
-			/*
-			 * A zero status is success otherwise we have a
-			 * negative error code.
-			 */
-			status = try_to_fill_dentry(dentry);
-
-			spin_lock(&sbi->fs_lock);
-			ino->flags &= ~AUTOFS_INF_PENDING;
-			spin_unlock(&sbi->fs_lock);
-
-			if (status == 0)
-				return 1;
-
-			return status;
-		}
+		return status;
 	}
 	spin_unlock(&dcache_lock);
-	spin_unlock(&sbi->fs_lock);
-
-	if (mutex_aquired)
-		mutex_unlock(&dir->i_mutex);
-
-	kfree(entry);
 
 	return 1;
 }
 
-static void autofs4_free_rehash_entrys(struct autofs_info *inf)
-{
-	struct list_head *head = &inf->rehash_list;
-	struct rehash_entry *entry, *next;
-	list_for_each_entry_safe(entry, next, head, list) {
-		list_del(&entry->list);
-		kfree(entry);
-	}
-}
-
 void autofs4_dentry_release(struct dentry *de)
 {
 	struct autofs_info *inf;
@@ -509,8 +382,6 @@
 				list_del(&inf->active);
 			if (!list_empty(&inf->expiring))
 				list_del(&inf->expiring);
-			if (!list_empty(&inf->rehash_list))
-				autofs4_free_rehash_entrys(inf);
 			spin_unlock(&sbi->lookup_lock);
 		}
 
@@ -543,7 +414,6 @@
 	const unsigned char *str = name->name;
 	struct list_head *p, *head;
 
-restart:
 	spin_lock(&dcache_lock);
 	spin_lock(&sbi->lookup_lock);
 	head = &sbi->active_list;
@@ -561,19 +431,6 @@
 		if (atomic_read(&active->d_count) == 0)
 			goto next;
 
-		if (active->d_inode && IS_DEADDIR(active->d_inode)) {
-			if (!list_empty(&ino->rehash_list)) {
-				dget(active);
-				spin_unlock(&active->d_lock);
-				spin_unlock(&sbi->lookup_lock);
-				spin_unlock(&dcache_lock);
-				autofs4_remove_rehash_entrys(ino);
-				dput(active);
-				goto restart;
-			}
-			goto next;
-		}
-
 		qstr = &active->d_name;
 
 		if (active->d_name.hash != hash)
@@ -586,11 +443,13 @@
 		if (memcmp(qstr->name, str, len))
 			goto next;
 
-		dget(active);
-		spin_unlock(&active->d_lock);
-		spin_unlock(&sbi->lookup_lock);
-		spin_unlock(&dcache_lock);
-		return active;
+		if (d_unhashed(active)) {
+			dget(active);
+			spin_unlock(&active->d_lock);
+			spin_unlock(&sbi->lookup_lock);
+			spin_unlock(&dcache_lock);
+			return active;
+		}
 next:
 		spin_unlock(&active->d_lock);
 	}
@@ -639,11 +498,13 @@
 		if (memcmp(qstr->name, str, len))
 			goto next;
 
-		dget(expiring);
-		spin_unlock(&expiring->d_lock);
-		spin_unlock(&sbi->lookup_lock);
-		spin_unlock(&dcache_lock);
-		return expiring;
+		if (d_unhashed(expiring)) {
+			dget(expiring);
+			spin_unlock(&expiring->d_lock);
+			spin_unlock(&sbi->lookup_lock);
+			spin_unlock(&dcache_lock);
+			return expiring;
+		}
 next:
 		spin_unlock(&expiring->d_lock);
 	}
@@ -653,48 +514,6 @@
 	return NULL;
 }
 
-static struct autofs_info *init_new_dentry(struct autofs_sb_info *sbi,
-					   struct dentry *dentry, int oz_mode)
-{
-	struct autofs_info *ino;
-
-	/*
-	 * Mark the dentry incomplete but don't hash it. We do this
-	 * to serialize our inode creation operations (symlink and
-	 * mkdir) which prevents deadlock during the callback to
-	 * the daemon. Subsequent user space lookups for the same
-	 * dentry are placed on the wait queue while the daemon
-	 * itself is allowed passage unresticted so the create
-	 * operation itself can then hash the dentry. Finally,
-	 * we check for the hashed dentry and return the newly
-	 * hashed dentry.
-	 */
-	dentry->d_op = &autofs4_root_dentry_operations;
-
-	/*
-	 * And we need to ensure that the same dentry is used for
-	 * all following lookup calls until it is hashed so that
-	 * the dentry flags are persistent throughout the request.
-	 */
-	ino = autofs4_init_ino(NULL, sbi, 0555);
-	if (!ino)
-		return ERR_PTR(-ENOMEM);
-
-	dentry->d_fsdata = ino;
-	ino->dentry = dentry;
-
-	/*
-	 * Only set the mount pending flag for new dentrys not created
-	 * by the daemon.
-	 */
-	if (!oz_mode)
-		ino->flags |= AUTOFS_INF_PENDING;
-
-	d_instantiate(dentry, NULL);
-
-	return ino;
-}
-
 /* Lookups in the root directory */
 static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
@@ -702,7 +521,6 @@
 	struct autofs_info *ino;
 	struct dentry *expiring, *active;
 	int oz_mode;
-	int status = 0;
 
 	DPRINTK("name = %.*s",
 		dentry->d_name.len, dentry->d_name.name);
@@ -717,26 +535,44 @@
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
 		 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
 
-	spin_lock(&sbi->fs_lock);
 	active = autofs4_lookup_active(dentry);
 	if (active) {
 		dentry = active;
 		ino = autofs4_dentry_ino(dentry);
-		/* If this came from revalidate, rehash it */
-		autofs4_revalidate_rehash(dentry);
-		spin_unlock(&sbi->fs_lock);
 	} else {
-		spin_unlock(&sbi->fs_lock);
-		ino = init_new_dentry(sbi, dentry, oz_mode);
-		if (IS_ERR(ino))
-			return (struct dentry *) ino;
+		/*
+		 * Mark the dentry incomplete but don't hash it. We do this
+		 * to serialize our inode creation operations (symlink and
+		 * mkdir) which prevents deadlock during the callback to
+		 * the daemon. Subsequent user space lookups for the same
+		 * dentry are placed on the wait queue while the daemon
+		 * itself is allowed passage unresticted so the create
+		 * operation itself can then hash the dentry. Finally,
+		 * we check for the hashed dentry and return the newly
+		 * hashed dentry.
+		 */
+		dentry->d_op = &autofs4_root_dentry_operations;
+
+		/*
+		 * And we need to ensure that the same dentry is used for
+		 * all following lookup calls until it is hashed so that
+		 * the dentry flags are persistent throughout the request.
+		 */
+		ino = autofs4_init_ino(NULL, sbi, 0555);
+		if (!ino)
+			return ERR_PTR(-ENOMEM);
+
+		dentry->d_fsdata = ino;
+		ino->dentry = dentry;
+
+		autofs4_add_active(dentry);
+
+		d_instantiate(dentry, NULL);
 	}
 
-	autofs4_add_active(dentry);
-
 	if (!oz_mode) {
-		expiring = autofs4_lookup_expiring(dentry);
 		mutex_unlock(&dir->i_mutex);
+		expiring = autofs4_lookup_expiring(dentry);
 		if (expiring) {
 			/*
 			 * If we are racing with expire the request might not
@@ -744,22 +580,23 @@
 			 * so it must have been successful, so just wait for it.
 			 */
 			autofs4_expire_wait(expiring);
+			autofs4_del_expiring(expiring);
 			dput(expiring);
 		}
-		status = try_to_fill_dentry(dentry);
-		mutex_lock(&dir->i_mutex);
+
 		spin_lock(&sbi->fs_lock);
-		ino->flags &= ~AUTOFS_INF_PENDING;
+		ino->flags |= AUTOFS_INF_PENDING;
 		spin_unlock(&sbi->fs_lock);
+		if (dentry->d_op && dentry->d_op->d_revalidate)
+			(dentry->d_op->d_revalidate)(dentry, nd);
+		mutex_lock(&dir->i_mutex);
 	}
 
-	autofs4_del_active(dentry);
-
 	/*
-	 * If we had a mount fail, check if we had to handle
+	 * If we are still pending, check if we had to handle
 	 * a signal. If so we can force a restart..
 	 */
-	if (status) {
+	if (ino->flags & AUTOFS_INF_PENDING) {
 		/* See if we were interrupted */
 		if (signal_pending(current)) {
 			sigset_t *sigset = &current->pending.signal;
@@ -771,46 +608,43 @@
 			    return ERR_PTR(-ERESTARTNOINTR);
 			}
 		}
-	}
-
-	/*
-	 * User space can (and has done in the past) remove and re-create
-	 * this directory during the callback. This can leave us with an
-	 * unhashed dentry, but a successful mount!  So we need to
-	 * perform another cached lookup in case the dentry now exists.
-	 */
-	if (!oz_mode && !have_submounts(dentry)) {
-		struct dentry *new;
-		new = d_lookup(dentry->d_parent, &dentry->d_name);
-		if (new) {
-			if (active)
-				dput(active);
-			return new;
-		} else {
-			if (!status)
-				status = -ENOENT;
+		if (!oz_mode) {
+			spin_lock(&sbi->fs_lock);
+			ino->flags &= ~AUTOFS_INF_PENDING;
+			spin_unlock(&sbi->fs_lock);
 		}
 	}
 
 	/*
-	 * If we had a mount failure, return status to user space.
-	 * If the mount succeeded and we used a dentry from the active queue
-	 * return it.
+	 * If this dentry is unhashed, then we shouldn't honour this
+	 * lookup.  Returning ENOENT here doesn't do the right thing
+	 * for all system calls, but it should be OK for the operations
+	 * we permit from an autofs.
 	 */
-	if (status) {
-		dentry = ERR_PTR(status);
+	if (!oz_mode && d_unhashed(dentry)) {
+		/*
+		 * A user space application can (and has done in the past)
+		 * remove and re-create this directory during the callback.
+		 * This can leave us with an unhashed dentry, but a
+		 * successful mount!  So we need to perform another
+		 * cached lookup in case the dentry now exists.
+		 */
+		struct dentry *parent = dentry->d_parent;
+		struct dentry *new = d_lookup(parent, &dentry->d_name);
+		if (new != NULL)
+			dentry = new;
+		else
+			dentry = ERR_PTR(-ENOENT);
+
 		if (active)
 			dput(active);
+
 		return dentry;
-	} else {
-		/*
-		 * Valid successful mount, return active dentry or NULL
-		 * for a new dentry.
-		 */
-		if (active)
-			return active;
 	}
 
+	if (active)
+		return active;
+
 	return NULL;
 }
 
@@ -834,6 +668,8 @@
 	if (!ino)
 		return -ENOMEM;
 
+	autofs4_del_active(dentry);
+
 	ino->size = strlen(symname);
 	cp = kmalloc(ino->size + 1, GFP_KERNEL);
 	if (!cp) {
@@ -910,6 +746,7 @@
 	dir->i_mtime = CURRENT_TIME;
 
 	spin_lock(&dcache_lock);
+	autofs4_add_expiring(dentry);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -935,6 +772,7 @@
 		spin_unlock(&dcache_lock);
 		return -ENOTEMPTY;
 	}
+	autofs4_add_expiring(dentry);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -972,6 +810,8 @@
 	if (!ino)
 		return -ENOMEM;
 
+	autofs4_del_active(dentry);
+
 	inode = autofs4_get_inode(dir->i_sb, ino);
 	if (!inode) {
 		if (!dentry->d_fsdata)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8f3d9fd..f22a7d3 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 #include <asm/uaccess.h>
 #include "bfs.h"
 
@@ -98,7 +99,7 @@
 	return ERR_PTR(-EIO);
 }
 
-static int bfs_write_inode(struct inode *inode, int wait)
+static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
 	unsigned int ino = (u16)inode->i_ino;
@@ -147,7 +148,7 @@
 	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh))
 			err = -EIO;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2aa8ec6..8b5cfdd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2326,7 +2326,7 @@
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
-int btrfs_write_inode(struct inode *inode, int wait);
+int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4deb280..c41db6d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3968,7 +3968,7 @@
 	return ret;
 }
 
-int btrfs_write_inode(struct inode *inode, int wait)
+int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
@@ -3977,7 +3977,7 @@
 	if (root->fs_info->btree_inode == inode)
 		return 0;
 
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		trans = btrfs_join_transaction(root, 1);
 		btrfs_set_trans_block_group(trans, inode);
 		ret = btrfs_commit_transaction(trans, root);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 057e1da..3d8f8a9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2289,9 +2289,9 @@
 	if (inode && S_ISREG(inode->i_mode)) {
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 		if (cinode->clientCanCacheAll == 0)
-			break_lease(inode, FMODE_READ);
+			break_lease(inode, O_RDONLY);
 		else if (cinode->clientCanCacheRead == 0)
-			break_lease(inode, FMODE_WRITE);
+			break_lease(inode, O_WRONLY);
 #endif
 		rc = filemap_fdatawrite(inode->i_mapping);
 		if (cinode->clientCanCacheRead == 0) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 953173a..f1358e5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -257,6 +257,7 @@
 	if (dentry)
 		goto repeat;
 }
+EXPORT_SYMBOL(dput);
 
 /**
  * d_invalidate - invalidate a dentry
@@ -314,6 +315,7 @@
 	spin_unlock(&dcache_lock);
 	return 0;
 }
+EXPORT_SYMBOL(d_invalidate);
 
 /* This should be called _only_ with dcache_lock held */
 
@@ -328,6 +330,7 @@
 {
 	return __dget_locked(dentry);
 }
+EXPORT_SYMBOL(dget_locked);
 
 /**
  * d_find_alias - grab a hashed alias of inode
@@ -384,6 +387,7 @@
 	}
 	return de;
 }
+EXPORT_SYMBOL(d_find_alias);
 
 /*
  *	Try to kill dentries associated with this inode.
@@ -408,6 +412,7 @@
 	}
 	spin_unlock(&dcache_lock);
 }
+EXPORT_SYMBOL(d_prune_aliases);
 
 /*
  * Throw away a dentry - free the inode, dput the parent.  This requires that
@@ -610,6 +615,7 @@
 {
 	__shrink_dcache_sb(sb, NULL, 0);
 }
+EXPORT_SYMBOL(shrink_dcache_sb);
 
 /*
  * destroy a single subtree of dentries for unmount
@@ -792,6 +798,7 @@
 	spin_unlock(&dcache_lock);
 	return 1;
 }
+EXPORT_SYMBOL(have_submounts);
 
 /*
  * Search the dentry child list for the specified parent,
@@ -876,6 +883,7 @@
 	while ((found = select_parent(parent)) != 0)
 		__shrink_dcache_sb(sb, &found, 0);
 }
+EXPORT_SYMBOL(shrink_dcache_parent);
 
 /*
  * Scan `nr' dentries and return the number which remain.
@@ -968,6 +976,7 @@
 
 	return dentry;
 }
+EXPORT_SYMBOL(d_alloc);
 
 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 {
@@ -1012,6 +1021,7 @@
 	spin_unlock(&dcache_lock);
 	security_d_instantiate(entry, inode);
 }
+EXPORT_SYMBOL(d_instantiate);
 
 /**
  * d_instantiate_unique - instantiate a non-aliased dentry
@@ -1108,6 +1118,7 @@
 	}
 	return res;
 }
+EXPORT_SYMBOL(d_alloc_root);
 
 static inline struct hlist_head *d_hash(struct dentry *parent,
 					unsigned long hash)
@@ -1211,7 +1222,6 @@
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
 			spin_unlock(&dcache_lock);
 			security_d_instantiate(new, inode);
-			d_rehash(dentry);
 			d_move(new, dentry);
 			iput(inode);
 		} else {
@@ -1225,6 +1235,7 @@
 		d_add(dentry, inode);
 	return new;
 }
+EXPORT_SYMBOL(d_splice_alias);
 
 /**
  * d_add_ci - lookup or allocate new dentry with case-exact name
@@ -1314,6 +1325,7 @@
 	iput(inode);
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL(d_add_ci);
 
 /**
  * d_lookup - search for a dentry
@@ -1357,6 +1369,7 @@
 	} while (read_seqretry(&rename_lock, seq));
 	return dentry;
 }
+EXPORT_SYMBOL(d_lookup);
 
 struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
 {
@@ -1483,6 +1496,7 @@
 out:
 	return 0;
 }
+EXPORT_SYMBOL(d_validate);
 
 /*
  * When a file is deleted, we have two options:
@@ -1528,6 +1542,7 @@
 
 	fsnotify_nameremove(dentry, isdir);
 }
+EXPORT_SYMBOL(d_delete);
 
 static void __d_rehash(struct dentry * entry, struct hlist_head *list)
 {
@@ -1556,6 +1571,7 @@
 	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
+EXPORT_SYMBOL(d_rehash);
 
 /*
  * When switching names, the actual string doesn't strictly have to
@@ -1702,6 +1718,7 @@
 	d_move_locked(dentry, target);
 	spin_unlock(&dcache_lock);
 }
+EXPORT_SYMBOL(d_move);
 
 /**
  * d_ancestor - search for an ancestor
@@ -1868,6 +1885,7 @@
 	spin_unlock(&dcache_lock);
 	BUG();
 }
+EXPORT_SYMBOL_GPL(d_materialise_unique);
 
 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
 {
@@ -2005,6 +2023,7 @@
 	path_put(&root);
 	return res;
 }
+EXPORT_SYMBOL(d_path);
 
 /*
  * Helper function for dentry_operations.d_dname() members
@@ -2171,6 +2190,30 @@
 	return result;
 }
 
+int path_is_under(struct path *path1, struct path *path2)
+{
+	struct vfsmount *mnt = path1->mnt;
+	struct dentry *dentry = path1->dentry;
+	int res;
+	spin_lock(&vfsmount_lock);
+	if (mnt != path2->mnt) {
+		for (;;) {
+			if (mnt->mnt_parent == mnt) {
+				spin_unlock(&vfsmount_lock);
+				return 0;
+			}
+			if (mnt->mnt_parent == path2->mnt)
+				break;
+			mnt = mnt->mnt_parent;
+		}
+		dentry = mnt->mnt_mountpoint;
+	}
+	res = is_subdir(dentry, path2->dentry);
+	spin_unlock(&vfsmount_lock);
+	return res;
+}
+EXPORT_SYMBOL(path_is_under);
+
 void d_genocide(struct dentry *root)
 {
 	struct dentry *this_parent = root;
@@ -2228,6 +2271,7 @@
 	}
 	return ino;
 }
+EXPORT_SYMBOL(find_inode_number);
 
 static __initdata unsigned long dhash_entries;
 static int __init set_dhash_entries(char *str)
@@ -2297,6 +2341,7 @@
 
 /* SLAB cache for __getname() consumers */
 struct kmem_cache *names_cachep __read_mostly;
+EXPORT_SYMBOL(names_cachep);
 
 EXPORT_SYMBOL(d_genocide);
 
@@ -2326,26 +2371,3 @@
 	bdev_cache_init();
 	chrdev_init();
 }
-
-EXPORT_SYMBOL(d_alloc);
-EXPORT_SYMBOL(d_alloc_root);
-EXPORT_SYMBOL(d_delete);
-EXPORT_SYMBOL(d_find_alias);
-EXPORT_SYMBOL(d_instantiate);
-EXPORT_SYMBOL(d_invalidate);
-EXPORT_SYMBOL(d_lookup);
-EXPORT_SYMBOL(d_move);
-EXPORT_SYMBOL_GPL(d_materialise_unique);
-EXPORT_SYMBOL(d_path);
-EXPORT_SYMBOL(d_prune_aliases);
-EXPORT_SYMBOL(d_rehash);
-EXPORT_SYMBOL(d_splice_alias);
-EXPORT_SYMBOL(d_add_ci);
-EXPORT_SYMBOL(d_validate);
-EXPORT_SYMBOL(dget_locked);
-EXPORT_SYMBOL(dput);
-EXPORT_SYMBOL(find_inode_number);
-EXPORT_SYMBOL(have_submounts);
-EXPORT_SYMBOL(names_cachep);
-EXPORT_SYMBOL(shrink_dcache_parent);
-EXPORT_SYMBOL(shrink_dcache_sb);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 274ac86..049d6c3 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -496,7 +496,7 @@
 	}
 	d_move(old_dentry, dentry);
 	fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
-		old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
+		S_ISDIR(old_dentry->d_inode->i_mode),
 		NULL, old_dentry);
 	fsnotify_oldname_free(old_name);
 	unlock_rename(new_dir, old_dir);
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1b178e..f0d5203 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -55,6 +55,8 @@
 /* exofs Application specific page/attribute */
 # define EXOFS_APAGE_FS_DATA	(OSD_APAGE_APP_DEFINED_FIRST + 3)
 # define EXOFS_ATTR_INODE_DATA	1
+# define EXOFS_ATTR_INODE_FILE_LAYOUT	2
+# define EXOFS_ATTR_INODE_DIR_LAYOUT	3
 
 /*
  * The maximum number of files we can have is limited by the size of the
@@ -206,4 +208,41 @@
 	(((name_len) + offsetof(struct exofs_dir_entry, name)  + \
 	  EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
 
+/*
+ * The on-disk (optional) layout structure.
+ * sits in an EXOFS_ATTR_INODE_FILE_LAYOUT or EXOFS_ATTR_INODE_DIR_LAYOUT
+ * attribute, attached to any inode, usually to a directory.
+ */
+
+enum exofs_inode_layout_gen_functions {
+	LAYOUT_MOVING_WINDOW = 0,
+	LAYOUT_IMPLICT = 1,
+};
+
+struct exofs_on_disk_inode_layout {
+	__le16 gen_func; /* One of enum exofs_inode_layout_gen_functions */
+	__le16 pad;
+	union {
+		/* gen_func == LAYOUT_MOVING_WINDOW (default) */
+		struct exofs_layout_sliding_window {
+			__le32 num_devices; /* first n devices in global-table*/
+		} sliding_window __packed;
+
+		/* gen_func == LAYOUT_IMPLICT */
+		struct exofs_layout_implict_list {
+			struct exofs_dt_data_map data_map;
+			/* Variable array of size data_map.cb_num_comps. These
+			 * are device indexes of the devices in the global table
+			 */
+			__le32 dev_indexes[];
+		} implict __packed;
+	};
+} __packed;
+
+static inline size_t exofs_on_disk_inode_layout_size(unsigned max_devs)
+{
+	return sizeof(struct exofs_on_disk_inode_layout) +
+		max_devs * sizeof(__le32);
+}
+
 #endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c35fd46..8442e35 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -55,12 +55,28 @@
 /* u64 has problems with printk this will cast it to unsigned long long */
 #define _LLU(x) (unsigned long long)(x)
 
+struct exofs_layout {
+	osd_id		s_pid;			/* partition ID of file system*/
+
+	/* Our way of looking at the data_map */
+	unsigned stripe_unit;
+	unsigned mirrors_p1;
+
+	unsigned group_width;
+	u64	 group_depth;
+	unsigned group_count;
+
+	enum exofs_inode_layout_gen_functions lay_func;
+
+	unsigned	s_numdevs;		/* Num of devices in array    */
+	struct osd_dev	*s_ods[0];		/* Variable length            */
+};
+
 /*
  * our extension to the in-memory superblock
  */
 struct exofs_sb_info {
 	struct exofs_fscb s_fscb;		/* Written often, pre-allocate*/
-	osd_id		s_pid;			/* partition ID of file system*/
 	int		s_timeout;		/* timeout for OSD operations */
 	uint64_t	s_nextid;		/* highest object ID used     */
 	uint32_t	s_numfiles;		/* number of files on fs      */
@@ -69,22 +85,27 @@
 	atomic_t	s_curr_pending;		/* number of pending commands */
 	uint8_t		s_cred[OSD_CAP_LEN];	/* credential for the fscb    */
 
-	struct pnfs_osd_data_map data_map;	/* Default raid to use        */
-	unsigned	s_numdevs;		/* Num of devices in array    */
-	struct osd_dev	*s_ods[1];		/* Variable length, minimum 1 */
+	struct pnfs_osd_data_map data_map;	/* Default raid to use
+						 * FIXME: Needed ?
+						 */
+/*	struct exofs_layout	dir_layout;*/	/* Default dir layout */
+	struct exofs_layout	layout;		/* Default files layout,
+						 * contains the variable osd_dev
+						 * array. Keep last */
+	struct osd_dev	*_min_one_dev[1];	/* Place holder for one dev   */
 };
 
 /*
  * our extension to the in-memory inode
  */
 struct exofs_i_info {
+	struct inode   vfs_inode;          /* normal in-memory inode          */
+	wait_queue_head_t i_wq;            /* wait queue for inode            */
 	unsigned long  i_flags;            /* various atomic flags            */
 	uint32_t       i_data[EXOFS_IDATA];/*short symlink names and device #s*/
 	uint32_t       i_dir_start_lookup; /* which page to start lookup      */
-	wait_queue_head_t i_wq;            /* wait queue for inode            */
 	uint64_t       i_commit_size;      /* the object's written length     */
 	uint8_t        i_cred[OSD_CAP_LEN];/* all-powerful credential         */
-	struct inode   vfs_inode;          /* normal in-memory inode          */
 };
 
 static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
@@ -101,7 +122,7 @@
 	void			*private;
 	exofs_io_done_fn	done;
 
-	struct exofs_sb_info	*sbi;
+	struct exofs_layout	*layout;
 	struct osd_obj_id	obj;
 	u8			*cred;
 
@@ -109,7 +130,11 @@
 	loff_t			offset;
 	unsigned long		length;
 	void			*kern_buff;
-	struct bio		*bio;
+
+	struct page		**pages;
+	unsigned		nr_pages;
+	unsigned		pgbase;
+	unsigned		pages_consumed;
 
 	/* Attributes */
 	unsigned		in_attr_len;
@@ -122,6 +147,9 @@
 	struct exofs_per_dev_state {
 		struct osd_request *or;
 		struct bio *bio;
+		loff_t offset;
+		unsigned length;
+		unsigned dev;
 	} per_dev[];
 };
 
@@ -175,6 +203,12 @@
 }
 
 /*
+ * Given a layout, object_number and stripe_index return the associated global
+ * dev_index
+ */
+unsigned exofs_layout_od_id(struct exofs_layout *layout,
+			    osd_id obj_no, unsigned layout_index);
+/*
  * Maximum count of links to a file
  */
 #define EXOFS_LINK_MAX           32000
@@ -189,7 +223,8 @@
 int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
 		    u64 offset, void *p, unsigned length);
 
-int  exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios);
+int  exofs_get_io_state(struct exofs_layout *layout,
+			struct exofs_io_state **ios);
 void exofs_put_io_state(struct exofs_io_state *ios);
 
 int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
@@ -226,7 +261,7 @@
 		struct page **pagep, void **fsdata);
 extern struct inode *exofs_iget(struct super_block *, unsigned long);
 struct inode *exofs_new_inode(struct inode *, int);
-extern int exofs_write_inode(struct inode *, int);
+extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
 extern void exofs_delete_inode(struct inode *);
 
 /* dir.c:                */
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 2afbceb..a17e4b7 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -41,16 +41,18 @@
 
 enum { BIO_MAX_PAGES_KMALLOC =
 		(PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
+	MAX_PAGES_KMALLOC =
+		PAGE_SIZE / sizeof(struct page *),
 };
 
 struct page_collect {
 	struct exofs_sb_info *sbi;
-	struct request_queue *req_q;
 	struct inode *inode;
 	unsigned expected_pages;
 	struct exofs_io_state *ios;
 
-	struct bio *bio;
+	struct page **pages;
+	unsigned alloc_pages;
 	unsigned nr_pages;
 	unsigned long length;
 	loff_t pg_first; /* keep 64bit also in 32-arches */
@@ -62,15 +64,12 @@
 	struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
 
 	pcol->sbi = sbi;
-	/* Create master bios on first Q, later on cloning, each clone will be
-	 * allocated on it's destination Q
-	 */
-	pcol->req_q = osd_request_queue(sbi->s_ods[0]);
 	pcol->inode = inode;
 	pcol->expected_pages = expected_pages;
 
 	pcol->ios = NULL;
-	pcol->bio = NULL;
+	pcol->pages = NULL;
+	pcol->alloc_pages = 0;
 	pcol->nr_pages = 0;
 	pcol->length = 0;
 	pcol->pg_first = -1;
@@ -80,7 +79,8 @@
 {
 	pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
 
-	pcol->bio = NULL;
+	pcol->pages = NULL;
+	pcol->alloc_pages = 0;
 	pcol->nr_pages = 0;
 	pcol->length = 0;
 	pcol->pg_first = -1;
@@ -90,38 +90,43 @@
 	 * it might not end here. don't be left with nothing
 	 */
 	if (!pcol->expected_pages)
-		pcol->expected_pages = BIO_MAX_PAGES_KMALLOC;
+		pcol->expected_pages = MAX_PAGES_KMALLOC;
 }
 
 static int pcol_try_alloc(struct page_collect *pcol)
 {
-	int pages = min_t(unsigned, pcol->expected_pages,
-			  BIO_MAX_PAGES_KMALLOC);
+	unsigned pages = min_t(unsigned, pcol->expected_pages,
+			  MAX_PAGES_KMALLOC);
 
 	if (!pcol->ios) { /* First time allocate io_state */
-		int ret = exofs_get_io_state(pcol->sbi, &pcol->ios);
+		int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
 
 		if (ret)
 			return ret;
 	}
 
+	/* TODO: easily support bio chaining */
+	pages =  min_t(unsigned, pages,
+		       pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
+
 	for (; pages; pages >>= 1) {
-		pcol->bio = bio_kmalloc(GFP_KERNEL, pages);
-		if (likely(pcol->bio))
+		pcol->pages = kmalloc(pages * sizeof(struct page *),
+				      GFP_KERNEL);
+		if (likely(pcol->pages)) {
+			pcol->alloc_pages = pages;
 			return 0;
+		}
 	}
 
-	EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n",
+	EXOFS_ERR("Failed to kmalloc expected_pages=%u\n",
 		  pcol->expected_pages);
 	return -ENOMEM;
 }
 
 static void pcol_free(struct page_collect *pcol)
 {
-	if (pcol->bio) {
-		bio_put(pcol->bio);
-		pcol->bio = NULL;
-	}
+	kfree(pcol->pages);
+	pcol->pages = NULL;
 
 	if (pcol->ios) {
 		exofs_put_io_state(pcol->ios);
@@ -132,11 +137,10 @@
 static int pcol_add_page(struct page_collect *pcol, struct page *page,
 			 unsigned len)
 {
-	int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0);
-	if (unlikely(len != added_len))
+	if (unlikely(pcol->nr_pages >= pcol->alloc_pages))
 		return -ENOMEM;
 
-	++pcol->nr_pages;
+	pcol->pages[pcol->nr_pages++] = page;
 	pcol->length += len;
 	return 0;
 }
@@ -181,7 +185,6 @@
  */
 static int __readpages_done(struct page_collect *pcol, bool do_unlock)
 {
-	struct bio_vec *bvec;
 	int i;
 	u64 resid;
 	u64 good_bytes;
@@ -193,13 +196,13 @@
 	else
 		good_bytes = pcol->length - resid;
 
-	EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx"
+	EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx"
 		     " length=0x%lx nr_pages=%u\n",
 		     pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
 		     pcol->nr_pages);
 
-	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
-		struct page *page = bvec->bv_page;
+	for (i = 0; i < pcol->nr_pages; i++) {
+		struct page *page = pcol->pages[i];
 		struct inode *inode = page->mapping->host;
 		int page_stat;
 
@@ -218,11 +221,11 @@
 		ret = update_read_page(page, page_stat);
 		if (do_unlock)
 			unlock_page(page);
-		length += bvec->bv_len;
+		length += PAGE_SIZE;
 	}
 
 	pcol_free(pcol);
-	EXOFS_DBGMSG("readpages_done END\n");
+	EXOFS_DBGMSG2("readpages_done END\n");
 	return ret;
 }
 
@@ -238,11 +241,10 @@
 
 static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
 {
-	struct bio_vec *bvec;
 	int i;
 
-	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
-		struct page *page = bvec->bv_page;
+	for (i = 0; i < pcol->nr_pages; i++) {
+		struct page *page = pcol->pages[i];
 
 		if (rw == READ)
 			update_read_page(page, ret);
@@ -260,13 +262,14 @@
 	struct page_collect *pcol_copy = NULL;
 	int ret;
 
-	if (!pcol->bio)
+	if (!pcol->pages)
 		return 0;
 
 	/* see comment in _readpage() about sync reads */
 	WARN_ON(is_sync && (pcol->nr_pages != 1));
 
-	ios->bio = pcol->bio;
+	ios->pages = pcol->pages;
+	ios->nr_pages = pcol->nr_pages;
 	ios->length = pcol->length;
 	ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
 
@@ -290,7 +293,7 @@
 
 	atomic_inc(&pcol->sbi->s_curr_pending);
 
-	EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
+	EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
 		  ios->obj.id, _LLU(ios->offset), pcol->length);
 
 	/* pages ownership was passed to pcol_copy */
@@ -366,7 +369,7 @@
 		goto try_again;
 	}
 
-	if (!pcol->bio) {
+	if (!pcol->pages) {
 		ret = pcol_try_alloc(pcol);
 		if (unlikely(ret))
 			goto fail;
@@ -448,7 +451,6 @@
 static void writepages_done(struct exofs_io_state *ios, void *p)
 {
 	struct page_collect *pcol = p;
-	struct bio_vec *bvec;
 	int i;
 	u64 resid;
 	u64  good_bytes;
@@ -462,13 +464,13 @@
 	else
 		good_bytes = pcol->length - resid;
 
-	EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx"
+	EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx"
 		     " length=0x%lx nr_pages=%u\n",
 		     pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
 		     pcol->nr_pages);
 
-	__bio_for_each_segment(bvec, pcol->bio, i, 0) {
-		struct page *page = bvec->bv_page;
+	for (i = 0; i < pcol->nr_pages; i++) {
+		struct page *page = pcol->pages[i];
 		struct inode *inode = page->mapping->host;
 		int page_stat;
 
@@ -485,12 +487,12 @@
 		EXOFS_DBGMSG2("    writepages_done(0x%lx, 0x%lx) status=%d\n",
 			     inode->i_ino, page->index, page_stat);
 
-		length += bvec->bv_len;
+		length += PAGE_SIZE;
 	}
 
 	pcol_free(pcol);
 	kfree(pcol);
-	EXOFS_DBGMSG("writepages_done END\n");
+	EXOFS_DBGMSG2("writepages_done END\n");
 }
 
 static int write_exec(struct page_collect *pcol)
@@ -500,7 +502,7 @@
 	struct page_collect *pcol_copy = NULL;
 	int ret;
 
-	if (!pcol->bio)
+	if (!pcol->pages)
 		return 0;
 
 	pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -512,9 +514,8 @@
 
 	*pcol_copy = *pcol;
 
-	pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
-
-	ios->bio = pcol_copy->bio;
+	ios->pages = pcol_copy->pages;
+	ios->nr_pages = pcol_copy->nr_pages;
 	ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
 	ios->length = pcol_copy->length;
 	ios->done = writepages_done;
@@ -527,7 +528,7 @@
 	}
 
 	atomic_inc(&pcol->sbi->s_curr_pending);
-	EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
+	EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
 		  pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
 		  pcol->length);
 	/* pages ownership was passed to pcol_copy */
@@ -605,7 +606,7 @@
 		goto try_again;
 	}
 
-	if (!pcol->bio) {
+	if (!pcol->pages) {
 		ret = pcol_try_alloc(pcol);
 		if (unlikely(ret))
 			goto fail;
@@ -616,7 +617,7 @@
 
 	ret = pcol_add_page(pcol, page, len);
 	if (unlikely(ret)) {
-		EXOFS_DBGMSG("Failed pcol_add_page "
+		EXOFS_DBGMSG2("Failed pcol_add_page "
 			     "nr_pages=%u total_length=0x%lx\n",
 			     pcol->nr_pages, pcol->length);
 
@@ -663,7 +664,7 @@
 	if (expected_pages < 32L)
 		expected_pages = 32L;
 
-	EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
+	EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
 		     "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
 		     mapping->host->i_ino, wbc->range_start, wbc->range_end,
 		     mapping->nrpages, start, end, expected_pages);
@@ -859,20 +860,33 @@
 	return error;
 }
 
+static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
+	EXOFS_APAGE_FS_DATA,
+	EXOFS_ATTR_INODE_FILE_LAYOUT,
+	0);
+static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF(
+	EXOFS_APAGE_FS_DATA,
+	EXOFS_ATTR_INODE_DIR_LAYOUT,
+	0);
+
 /*
- * Read an inode from the OSD, and return it as is.  We also return the size
- * attribute in the 'obj_size' argument.
+ * Read the Linux inode info from the OSD, and return it as is. In exofs the
+ * inode info is in an application specific page/attribute of the osd-object.
  */
 static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
-		    struct exofs_fcb *inode, uint64_t *obj_size)
+		    struct exofs_fcb *inode)
 {
 	struct exofs_sb_info *sbi = sb->s_fs_info;
-	struct osd_attr attrs[2];
+	struct osd_attr attrs[] = {
+		[0] = g_attr_inode_data,
+		[1] = g_attr_inode_file_layout,
+		[2] = g_attr_inode_dir_layout,
+	};
 	struct exofs_io_state *ios;
+	struct exofs_on_disk_inode_layout *layout;
 	int ret;
 
-	*obj_size = ~0;
-	ret = exofs_get_io_state(sbi, &ios);
+	ret = exofs_get_io_state(&sbi->layout, &ios);
 	if (unlikely(ret)) {
 		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
 		return ret;
@@ -882,14 +896,25 @@
 	exofs_make_credential(oi->i_cred, &ios->obj);
 	ios->cred = oi->i_cred;
 
-	attrs[0] = g_attr_inode_data;
-	attrs[1] = g_attr_logical_length;
+	attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
+	attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
+
 	ios->in_attr = attrs;
 	ios->in_attr_len = ARRAY_SIZE(attrs);
 
 	ret = exofs_sbi_read(ios);
-	if (ret)
+	if (unlikely(ret)) {
+		EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
+			  _LLU(ios->obj.id), ret);
+		memset(inode, 0, sizeof(*inode));
+		inode->i_mode = 0040000 | (0777 & ~022);
+		/* If object is lost on target we might as well enable it's
+		 * delete.
+		 */
+		if ((ret == -ENOENT) || (ret == -EINVAL))
+			ret = 0;
 		goto out;
+	}
 
 	ret = extract_attr_from_ios(ios, &attrs[0]);
 	if (ret) {
@@ -901,11 +926,33 @@
 
 	ret = extract_attr_from_ios(ios, &attrs[1]);
 	if (ret) {
-		EXOFS_ERR("%s: extract_attr of logical_length failed\n",
-			  __func__);
+		EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
 		goto out;
 	}
-	*obj_size = get_unaligned_be64(attrs[1].val_ptr);
+	if (attrs[1].len) {
+		layout = attrs[1].val_ptr;
+		if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
+			EXOFS_ERR("%s: unsupported files layout %d\n",
+				__func__, layout->gen_func);
+			ret = -ENOTSUPP;
+			goto out;
+		}
+	}
+
+	ret = extract_attr_from_ios(ios, &attrs[2]);
+	if (ret) {
+		EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
+		goto out;
+	}
+	if (attrs[2].len) {
+		layout = attrs[2].val_ptr;
+		if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
+			EXOFS_ERR("%s: unsupported meta-data layout %d\n",
+				__func__, layout->gen_func);
+			ret = -ENOTSUPP;
+			goto out;
+		}
+	}
 
 out:
 	exofs_put_io_state(ios);
@@ -925,7 +972,6 @@
 	struct exofs_i_info *oi;
 	struct exofs_fcb fcb;
 	struct inode *inode;
-	uint64_t obj_size;
 	int ret;
 
 	inode = iget_locked(sb, ino);
@@ -937,7 +983,7 @@
 	__oi_init(oi);
 
 	/* read the inode from the osd */
-	ret = exofs_get_inode(sb, oi, &fcb, &obj_size);
+	ret = exofs_get_inode(sb, oi, &fcb);
 	if (ret)
 		goto bad_inode;
 
@@ -958,13 +1004,6 @@
 	inode->i_blkbits = EXOFS_BLKSHIFT;
 	inode->i_generation = le32_to_cpu(fcb.i_generation);
 
-	if ((inode->i_size != obj_size) &&
-		(!exofs_inode_is_fast_symlink(inode))) {
-		EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
-			  inode->i_size, _LLU(obj_size));
-		/* FIXME: call exofs_inode_recovery() */
-	}
-
 	oi->i_dir_start_lookup = 0;
 
 	if ((inode->i_nlink == 0) && (inode->i_mode == 0)) {
@@ -1043,7 +1082,7 @@
 
 	if (unlikely(ret)) {
 		EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
-			  _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
+			  _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
 		/*TODO: When FS is corrupted creation can fail, object already
 		 * exist. Get rid of this asynchronous creation, if exist
 		 * increment the obj counter and try the next object. Until we
@@ -1104,7 +1143,7 @@
 
 	mark_inode_dirty(inode);
 
-	ret = exofs_get_io_state(sbi, &ios);
+	ret = exofs_get_io_state(&sbi->layout, &ios);
 	if (unlikely(ret)) {
 		EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
 		return ERR_PTR(ret);
@@ -1170,8 +1209,10 @@
 	int ret;
 
 	args = kzalloc(sizeof(*args), GFP_KERNEL);
-	if (!args)
+	if (!args) {
+		EXOFS_DBGMSG("Faild kzalloc of args\n");
 		return -ENOMEM;
+	}
 
 	fcb = &args->fcb;
 
@@ -1200,7 +1241,7 @@
 	} else
 		memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
 
-	ret = exofs_get_io_state(sbi, &ios);
+	ret = exofs_get_io_state(&sbi->layout, &ios);
 	if (unlikely(ret)) {
 		EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
 		goto free_args;
@@ -1234,13 +1275,14 @@
 free_args:
 	kfree(args);
 out:
-	EXOFS_DBGMSG("ret=>%d\n", ret);
+	EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n",
+		     inode->i_ino, do_sync, ret);
 	return ret;
 }
 
-int exofs_write_inode(struct inode *inode, int wait)
+int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return exofs_update_inode(inode, wait);
+	return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 }
 
 /*
@@ -1283,7 +1325,7 @@
 
 	clear_inode(inode);
 
-	ret = exofs_get_io_state(sbi, &ios);
+	ret = exofs_get_io_state(&sbi->layout, &ios);
 	if (unlikely(ret)) {
 		EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
 		return;
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 5bad01f..5293bc4 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -23,9 +23,13 @@
  */
 
 #include <scsi/scsi_device.h>
+#include <asm/div64.h>
 
 #include "exofs.h"
 
+#define EXOFS_DBGMSG2(M...) do {} while (0)
+/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
+
 void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
 {
 	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
@@ -64,21 +68,24 @@
 	return ret;
 }
 
-int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios)
+int exofs_get_io_state(struct exofs_layout *layout,
+		       struct exofs_io_state **pios)
 {
 	struct exofs_io_state *ios;
 
 	/*TODO: Maybe use kmem_cach per sbi of size
-	 * exofs_io_state_size(sbi->s_numdevs)
+	 * exofs_io_state_size(layout->s_numdevs)
 	 */
-	ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL);
+	ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
 	if (unlikely(!ios)) {
+		EXOFS_DBGMSG("Faild kzalloc bytes=%d\n",
+			     exofs_io_state_size(layout->s_numdevs));
 		*pios = NULL;
 		return -ENOMEM;
 	}
 
-	ios->sbi = sbi;
-	ios->obj.partition = sbi->s_pid;
+	ios->layout = layout;
+	ios->obj.partition = layout->s_pid;
 	*pios = ios;
 	return 0;
 }
@@ -101,6 +108,29 @@
 	}
 }
 
+unsigned exofs_layout_od_id(struct exofs_layout *layout,
+			    osd_id obj_no, unsigned layout_index)
+{
+/*	switch (layout->lay_func) {
+	case LAYOUT_MOVING_WINDOW:
+	{*/
+		unsigned dev_mod = obj_no;
+
+		return (layout_index + dev_mod * layout->mirrors_p1) %
+							      layout->s_numdevs;
+/*	}
+	case LAYOUT_FUNC_IMPLICT:
+		return layout->devs[layout_index];
+	}*/
+}
+
+static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
+					   unsigned layout_index)
+{
+	return ios->layout->s_ods[
+		exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
+}
+
 static void _sync_done(struct exofs_io_state *ios, void *p)
 {
 	struct completion *waiting = p;
@@ -168,6 +198,21 @@
 	return ret;
 }
 
+static void _clear_bio(struct bio *bio)
+{
+	struct bio_vec *bv;
+	unsigned i;
+
+	__bio_for_each_segment(bv, bio, i, 0) {
+		unsigned this_count = bv->bv_len;
+
+		if (likely(PAGE_SIZE == this_count))
+			clear_highpage(bv->bv_page);
+		else
+			zero_user(bv->bv_page, bv->bv_offset, this_count);
+	}
+}
+
 int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
 {
 	enum osd_err_priority acumulated_osd_err = 0;
@@ -176,16 +221,25 @@
 
 	for (i = 0; i < ios->numdevs; i++) {
 		struct osd_sense_info osi;
-		int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi);
+		struct osd_request *or = ios->per_dev[i].or;
+		int ret;
 
+		if (unlikely(!or))
+			continue;
+
+		ret = osd_req_decode_sense(or, &osi);
 		if (likely(!ret))
 			continue;
 
-		if (unlikely(ret == -EFAULT)) {
-			EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__);
-			/*FIXME: All the pages in this device range should:
-			 *	clear_highpage(page);
-			 */
+		if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
+			/* start read offset passed endof file */
+			_clear_bio(ios->per_dev[i].bio);
+			EXOFS_DBGMSG("start read offset passed end of file "
+				"offset=0x%llx, length=0x%llx\n",
+				_LLU(ios->per_dev[i].offset),
+				_LLU(ios->per_dev[i].length));
+
+			continue; /* we recovered */
 		}
 
 		if (osi.osd_err_pri >= acumulated_osd_err) {
@@ -205,14 +259,259 @@
 	return acumulated_lin_err;
 }
 
+/*
+ * L - logical offset into the file
+ *
+ * U - The number of bytes in a stripe within a group
+ *
+ *	U = stripe_unit * group_width
+ *
+ * T - The number of bytes striped within a group of component objects
+ *     (before advancing to the next group)
+ *
+ *	T = stripe_unit * group_width * group_depth
+ *
+ * S - The number of bytes striped across all component objects
+ *     before the pattern repeats
+ *
+ *	S = stripe_unit * group_width * group_depth * group_count
+ *
+ * M - The "major" (i.e., across all components) stripe number
+ *
+ *	M = L / S
+ *
+ * G - Counts the groups from the beginning of the major stripe
+ *
+ *	G = (L - (M * S)) / T	[or (L % S) / T]
+ *
+ * H - The byte offset within the group
+ *
+ *	H = (L - (M * S)) % T	[or (L % S) % T]
+ *
+ * N - The "minor" (i.e., across the group) stripe number
+ *
+ *	N = H / U
+ *
+ * C - The component index coresponding to L
+ *
+ *	C = (H - (N * U)) / stripe_unit + G * group_width
+ *	[or (L % U) / stripe_unit + G * group_width]
+ *
+ * O - The component offset coresponding to L
+ *
+ *	O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
+ */
+struct _striping_info {
+	u64 obj_offset;
+	u64 group_length;
+	u64 total_group_length;
+	u64 Major;
+	unsigned dev;
+	unsigned unit_off;
+};
+
+static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
+			      struct _striping_info *si)
+{
+	u32	stripe_unit = ios->layout->stripe_unit;
+	u32	group_width = ios->layout->group_width;
+	u64	group_depth = ios->layout->group_depth;
+
+	u32	U = stripe_unit * group_width;
+	u64	T = U * group_depth;
+	u64	S = T * ios->layout->group_count;
+	u64	M = div64_u64(file_offset, S);
+
+	/*
+	G = (L - (M * S)) / T
+	H = (L - (M * S)) % T
+	*/
+	u64	LmodS = file_offset - M * S;
+	u32	G = div64_u64(LmodS, T);
+	u64	H = LmodS - G * T;
+
+	u32	N = div_u64(H, U);
+
+	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
+	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
+	si->dev *= ios->layout->mirrors_p1;
+
+	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
+
+	si->obj_offset = si->unit_off + (N * stripe_unit) +
+				  (M * group_depth * stripe_unit);
+
+	si->group_length = T - H;
+	si->total_group_length = T;
+	si->Major = M;
+}
+
+static int _add_stripe_unit(struct exofs_io_state *ios,  unsigned *cur_pg,
+		unsigned pgbase, struct exofs_per_dev_state *per_dev,
+		int cur_len)
+{
+	unsigned pg = *cur_pg;
+	struct request_queue *q =
+			osd_request_queue(exofs_ios_od(ios, per_dev->dev));
+
+	per_dev->length += cur_len;
+
+	if (per_dev->bio == NULL) {
+		unsigned pages_in_stripe = ios->layout->group_width *
+					(ios->layout->stripe_unit / PAGE_SIZE);
+		unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
+						ios->layout->group_width;
+
+		per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
+		if (unlikely(!per_dev->bio)) {
+			EXOFS_DBGMSG("Faild to allocate BIO size=%u\n",
+				     bio_size);
+			return -ENOMEM;
+		}
+	}
+
+	while (cur_len > 0) {
+		unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
+		unsigned added_len;
+
+		BUG_ON(ios->nr_pages <= pg);
+		cur_len -= pglen;
+
+		added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
+					    pglen, pgbase);
+		if (unlikely(pglen != added_len))
+			return -ENOMEM;
+		pgbase = 0;
+		++pg;
+	}
+	BUG_ON(cur_len);
+
+	*cur_pg = pg;
+	return 0;
+}
+
+static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
+			      struct _striping_info *si, unsigned first_comp)
+{
+	unsigned stripe_unit = ios->layout->stripe_unit;
+	unsigned mirrors_p1 = ios->layout->mirrors_p1;
+	unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
+	unsigned dev = si->dev;
+	unsigned first_dev = dev - (dev % devs_in_group);
+	unsigned comp = first_comp + (dev - first_dev);
+	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
+	unsigned cur_pg = ios->pages_consumed;
+	int ret = 0;
+
+	while (length) {
+		struct exofs_per_dev_state *per_dev = &ios->per_dev[comp];
+		unsigned cur_len, page_off = 0;
+
+		if (!per_dev->length) {
+			per_dev->dev = dev;
+			if (dev < si->dev) {
+				per_dev->offset = si->obj_offset + stripe_unit -
+								   si->unit_off;
+				cur_len = stripe_unit;
+			} else if (dev == si->dev) {
+				per_dev->offset = si->obj_offset;
+				cur_len = stripe_unit - si->unit_off;
+				page_off = si->unit_off & ~PAGE_MASK;
+				BUG_ON(page_off && (page_off != ios->pgbase));
+			} else { /* dev > si->dev */
+				per_dev->offset = si->obj_offset - si->unit_off;
+				cur_len = stripe_unit;
+			}
+
+			if (max_comp < comp)
+				max_comp = comp;
+
+			dev += mirrors_p1;
+			dev = (dev % devs_in_group) + first_dev;
+		} else {
+			cur_len = stripe_unit;
+		}
+		if (cur_len >= length)
+			cur_len = length;
+
+		ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
+				       cur_len);
+		if (unlikely(ret))
+			goto out;
+
+		comp += mirrors_p1;
+		comp = (comp % devs_in_group) + first_comp;
+
+		length -= cur_len;
+	}
+out:
+	ios->numdevs = max_comp + mirrors_p1;
+	ios->pages_consumed = cur_pg;
+	return ret;
+}
+
+static int _prepare_for_striping(struct exofs_io_state *ios)
+{
+	u64 length = ios->length;
+	struct _striping_info si;
+	unsigned devs_in_group = ios->layout->group_width *
+				 ios->layout->mirrors_p1;
+	unsigned first_comp = 0;
+	int ret = 0;
+
+	_calc_stripe_info(ios, ios->offset, &si);
+
+	if (!ios->pages) {
+		if (ios->kern_buff) {
+			struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
+
+			per_dev->offset = si.obj_offset;
+			per_dev->dev = si.dev;
+
+			/* no cross device without page array */
+			BUG_ON((ios->layout->group_width > 1) &&
+			       (si.unit_off + ios->length >
+				ios->layout->stripe_unit));
+		}
+		ios->numdevs = ios->layout->mirrors_p1;
+		return 0;
+	}
+
+	while (length) {
+		if (length < si.group_length)
+			si.group_length = length;
+
+		ret = _prepare_one_group(ios, si.group_length, &si, first_comp);
+		if (unlikely(ret))
+			goto out;
+
+		length -= si.group_length;
+
+		si.group_length = si.total_group_length;
+		si.unit_off = 0;
+		++si.Major;
+		si.obj_offset = si.Major * ios->layout->stripe_unit *
+						ios->layout->group_depth;
+
+		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
+		si.dev %= ios->layout->s_numdevs;
+
+		first_comp += devs_in_group;
+		first_comp %= ios->layout->s_numdevs;
+	}
+
+out:
+	return ret;
+}
+
 int exofs_sbi_create(struct exofs_io_state *ios)
 {
 	int i, ret;
 
-	for (i = 0; i < ios->sbi->s_numdevs; i++) {
+	for (i = 0; i < ios->layout->s_numdevs; i++) {
 		struct osd_request *or;
 
-		or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
+		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
 		if (unlikely(!or)) {
 			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
@@ -233,10 +532,10 @@
 {
 	int i, ret;
 
-	for (i = 0; i < ios->sbi->s_numdevs; i++) {
+	for (i = 0; i < ios->layout->s_numdevs; i++) {
 		struct osd_request *or;
 
-		or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
+		or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
 		if (unlikely(!or)) {
 			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
@@ -253,51 +552,74 @@
 	return ret;
 }
 
-int exofs_sbi_write(struct exofs_io_state *ios)
+static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
 {
-	int i, ret;
+	struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
+	unsigned dev = ios->per_dev[cur_comp].dev;
+	unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
+	int ret = 0;
 
-	for (i = 0; i < ios->sbi->s_numdevs; i++) {
+	if (ios->pages && !master_dev->length)
+		return 0; /* Just an empty slot */
+
+	for (; cur_comp < last_comp; ++cur_comp, ++dev) {
+		struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
 		struct osd_request *or;
 
-		or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
+		or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
 		if (unlikely(!or)) {
 			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
 			goto out;
 		}
-		ios->per_dev[i].or = or;
-		ios->numdevs++;
+		per_dev->or = or;
+		per_dev->offset = master_dev->offset;
 
-		if (ios->bio) {
+		if (ios->pages) {
 			struct bio *bio;
 
-			if (i != 0) {
+			if (per_dev != master_dev) {
 				bio = bio_kmalloc(GFP_KERNEL,
-						  ios->bio->bi_max_vecs);
+						  master_dev->bio->bi_max_vecs);
 				if (unlikely(!bio)) {
+					EXOFS_DBGMSG(
+					      "Faild to allocate BIO size=%u\n",
+					      master_dev->bio->bi_max_vecs);
 					ret = -ENOMEM;
 					goto out;
 				}
 
-				__bio_clone(bio, ios->bio);
+				__bio_clone(bio, master_dev->bio);
 				bio->bi_bdev = NULL;
 				bio->bi_next = NULL;
-				ios->per_dev[i].bio =  bio;
+				per_dev->length = master_dev->length;
+				per_dev->bio =  bio;
+				per_dev->dev = dev;
 			} else {
-				bio = ios->bio;
+				bio = master_dev->bio;
+				/* FIXME: bio_set_dir() */
+				bio->bi_rw |= (1 << BIO_RW);
 			}
 
-			osd_req_write(or, &ios->obj, ios->offset, bio,
-				      ios->length);
-/*			EXOFS_DBGMSG("write sync=%d\n", sync);*/
+			osd_req_write(or, &ios->obj, per_dev->offset, bio,
+				      per_dev->length);
+			EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
+				      "length=0x%llx dev=%d\n",
+				     _LLU(ios->obj.id), _LLU(per_dev->offset),
+				     _LLU(per_dev->length), dev);
 		} else if (ios->kern_buff) {
-			osd_req_write_kern(or, &ios->obj, ios->offset,
+			ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
 					   ios->kern_buff, ios->length);
-/*			EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/
+			if (unlikely(ret))
+				goto out;
+			EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
+				      "length=0x%llx dev=%d\n",
+				     _LLU(ios->obj.id), _LLU(per_dev->offset),
+				     _LLU(ios->length), dev);
 		} else {
 			osd_req_set_attributes(or, &ios->obj);
-/*			EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/
+			EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
+				     _LLU(ios->obj.id), ios->out_attr_len, dev);
 		}
 
 		if (ios->out_attr)
@@ -308,54 +630,93 @@
 			osd_req_add_get_attr_list(or, ios->in_attr,
 						  ios->in_attr_len);
 	}
-	ret = exofs_io_execute(ios);
 
 out:
 	return ret;
 }
 
+int exofs_sbi_write(struct exofs_io_state *ios)
+{
+	int i;
+	int ret;
+
+	ret = _prepare_for_striping(ios);
+	if (unlikely(ret))
+		return ret;
+
+	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
+		ret = _sbi_write_mirror(ios, i);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	ret = exofs_io_execute(ios);
+	return ret;
+}
+
+static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
+{
+	struct osd_request *or;
+	struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
+	unsigned first_dev = (unsigned)ios->obj.id;
+
+	if (ios->pages && !per_dev->length)
+		return 0; /* Just an empty slot */
+
+	first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
+	or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
+	if (unlikely(!or)) {
+		EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+		return -ENOMEM;
+	}
+	per_dev->or = or;
+
+	if (ios->pages) {
+		osd_req_read(or, &ios->obj, per_dev->offset,
+				per_dev->bio, per_dev->length);
+		EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
+			     " dev=%d\n", _LLU(ios->obj.id),
+			     _LLU(per_dev->offset), _LLU(per_dev->length),
+			     first_dev);
+	} else if (ios->kern_buff) {
+		int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
+					    ios->kern_buff, ios->length);
+		EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
+			      "length=0x%llx dev=%d ret=>%d\n",
+			      _LLU(ios->obj.id), _LLU(per_dev->offset),
+			      _LLU(ios->length), first_dev, ret);
+		if (unlikely(ret))
+			return ret;
+	} else {
+		osd_req_get_attributes(or, &ios->obj);
+		EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
+			      _LLU(ios->obj.id), ios->in_attr_len, first_dev);
+	}
+	if (ios->out_attr)
+		osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
+
+	if (ios->in_attr)
+		osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
+
+	return 0;
+}
+
 int exofs_sbi_read(struct exofs_io_state *ios)
 {
-	int i, ret;
+	int i;
+	int ret;
 
-	for (i = 0; i < 1; i++) {
-		struct osd_request *or;
-		unsigned first_dev = (unsigned)ios->obj.id;
+	ret = _prepare_for_striping(ios);
+	if (unlikely(ret))
+		return ret;
 
-		first_dev %= ios->sbi->s_numdevs;
-		or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL);
-		if (unlikely(!or)) {
-			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
-			ret = -ENOMEM;
-			goto out;
-		}
-		ios->per_dev[i].or = or;
-		ios->numdevs++;
-
-		if (ios->bio) {
-			osd_req_read(or, &ios->obj, ios->offset, ios->bio,
-				     ios->length);
-/*			EXOFS_DBGMSG("read sync=%d\n", sync);*/
-		} else if (ios->kern_buff) {
-			osd_req_read_kern(or, &ios->obj, ios->offset,
-					   ios->kern_buff, ios->length);
-/*			EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/
-		} else {
-			osd_req_get_attributes(or, &ios->obj);
-/*			EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/
-		}
-
-		if (ios->out_attr)
-			osd_req_add_set_attr_list(or, ios->out_attr,
-						  ios->out_attr_len);
-
-		if (ios->in_attr)
-			osd_req_add_get_attr_list(or, ios->in_attr,
-						  ios->in_attr_len);
+	for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
+		ret = _sbi_read_mirror(ios, i);
+		if (unlikely(ret))
+			return ret;
 	}
-	ret = exofs_io_execute(ios);
 
-out:
+	ret = exofs_io_execute(ios);
 	return ret;
 }
 
@@ -380,42 +741,82 @@
 	return -EIO;
 }
 
+static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
+			     struct osd_attr *attr)
+{
+	int last_comp = cur_comp + ios->layout->mirrors_p1;
+
+	for (; cur_comp < last_comp; ++cur_comp) {
+		struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
+		struct osd_request *or;
+
+		or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
+		if (unlikely(!or)) {
+			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
+			return -ENOMEM;
+		}
+		per_dev->or = or;
+
+		osd_req_set_attributes(or, &ios->obj);
+		osd_req_add_set_attr_list(or, attr, 1);
+	}
+
+	return 0;
+}
+
 int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
 {
 	struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
 	struct exofs_io_state *ios;
-	struct osd_attr attr;
-	__be64 newsize;
+	struct exofs_trunc_attr {
+		struct osd_attr attr;
+		__be64 newsize;
+	} *size_attrs;
+	struct _striping_info si;
 	int i, ret;
 
-	if (exofs_get_io_state(sbi, &ios))
-		return -ENOMEM;
+	ret = exofs_get_io_state(&sbi->layout, &ios);
+	if (unlikely(ret))
+		return ret;
+
+	size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs),
+			     GFP_KERNEL);
+	if (unlikely(!size_attrs)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	ios->obj.id = exofs_oi_objno(oi);
 	ios->cred = oi->i_cred;
 
-	newsize = cpu_to_be64(size);
-	attr = g_attr_logical_length;
-	attr.val_ptr = &newsize;
+	ios->numdevs = ios->layout->s_numdevs;
+	_calc_stripe_info(ios, size, &si);
 
-	for (i = 0; i < sbi->s_numdevs; i++) {
-		struct osd_request *or;
+	for (i = 0; i < ios->layout->group_width; ++i) {
+		struct exofs_trunc_attr *size_attr = &size_attrs[i];
+		u64 obj_size;
 
-		or = osd_start_request(sbi->s_ods[i], GFP_KERNEL);
-		if (unlikely(!or)) {
-			EXOFS_ERR("%s: osd_start_request failed\n", __func__);
-			ret = -ENOMEM;
+		if (i < si.dev)
+			obj_size = si.obj_offset +
+					ios->layout->stripe_unit - si.unit_off;
+		else if (i == si.dev)
+			obj_size = si.obj_offset;
+		else /* i > si.dev */
+			obj_size = si.obj_offset - si.unit_off;
+
+		size_attr->newsize = cpu_to_be64(obj_size);
+		size_attr->attr = g_attr_logical_length;
+		size_attr->attr.val_ptr = &size_attr->newsize;
+
+		ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
+					&size_attr->attr);
+		if (unlikely(ret))
 			goto out;
-		}
-		ios->per_dev[i].or = or;
-		ios->numdevs++;
-
-		osd_req_set_attributes(or, &ios->obj);
-		osd_req_add_set_attr_list(or, &attr, 1);
 	}
 	ret = exofs_io_execute(ios);
 
 out:
+	kfree(size_attrs);
 	exofs_put_io_state(ios);
 	return ret;
 }
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index a1d1e77..6cf5e4e 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -210,7 +210,7 @@
 	sbi = sb->s_fs_info;
 	fscb = &sbi->s_fscb;
 
-	ret = exofs_get_io_state(sbi, &ios);
+	ret = exofs_get_io_state(&sbi->layout, &ios);
 	if (ret)
 		goto out;
 
@@ -264,12 +264,12 @@
 
 void exofs_free_sbi(struct exofs_sb_info *sbi)
 {
-	while (sbi->s_numdevs) {
-		int i = --sbi->s_numdevs;
-		struct osd_dev *od = sbi->s_ods[i];
+	while (sbi->layout.s_numdevs) {
+		int i = --sbi->layout.s_numdevs;
+		struct osd_dev *od = sbi->layout.s_ods[i];
 
 		if (od) {
-			sbi->s_ods[i] = NULL;
+			sbi->layout.s_ods[i] = NULL;
 			osduld_put_device(od);
 		}
 	}
@@ -298,7 +298,8 @@
 				  msecs_to_jiffies(100));
 	}
 
-	_exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid);
+	_exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
+			    sbi->layout.s_pid);
 
 	exofs_free_sbi(sbi);
 	sb->s_fs_info = NULL;
@@ -307,6 +308,8 @@
 static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
 				    struct exofs_device_table *dt)
 {
+	u64 stripe_length;
+
 	sbi->data_map.odm_num_comps   =
 				le32_to_cpu(dt->dt_data_map.cb_num_comps);
 	sbi->data_map.odm_stripe_unit =
@@ -320,14 +323,63 @@
 	sbi->data_map.odm_raid_algorithm  =
 				le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
 
-/* FIXME: Hard coded mirror only for now. if not so do not mount */
-	if ((sbi->data_map.odm_num_comps != numdevs) ||
-	    (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) ||
-	    (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) ||
-	    (sbi->data_map.odm_mirror_cnt != (numdevs - 1)))
+/* FIXME: Only raid0 for now. if not so, do not mount */
+	if (sbi->data_map.odm_num_comps != numdevs) {
+		EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
+			  sbi->data_map.odm_num_comps, numdevs);
 		return -EINVAL;
-	else
-		return 0;
+	}
+	if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) {
+		EXOFS_ERR("Only RAID_0 for now\n");
+		return -EINVAL;
+	}
+	if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) {
+		EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n",
+			  numdevs, sbi->data_map.odm_mirror_cnt);
+		return -EINVAL;
+	}
+
+	if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
+		EXOFS_ERR("Stripe Unit(0x%llx)"
+			  " must be Multples of PAGE_SIZE(0x%lx)\n",
+			  _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE);
+		return -EINVAL;
+	}
+
+	sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
+	sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
+
+	if (sbi->data_map.odm_group_width) {
+		sbi->layout.group_width = sbi->data_map.odm_group_width;
+		sbi->layout.group_depth = sbi->data_map.odm_group_depth;
+		if (!sbi->layout.group_depth) {
+			EXOFS_ERR("group_depth == 0 && group_width != 0\n");
+			return -EINVAL;
+		}
+		sbi->layout.group_count = sbi->data_map.odm_num_comps /
+						sbi->layout.mirrors_p1 /
+						sbi->data_map.odm_group_width;
+	} else {
+		if (sbi->data_map.odm_group_depth) {
+			printk(KERN_NOTICE "Warning: group_depth ignored "
+				"group_width == 0 && group_depth == %d\n",
+				sbi->data_map.odm_group_depth);
+			sbi->data_map.odm_group_depth = 0;
+		}
+		sbi->layout.group_width = sbi->data_map.odm_num_comps /
+							sbi->layout.mirrors_p1;
+		sbi->layout.group_depth = -1;
+		sbi->layout.group_count = 1;
+	}
+
+	stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
+	if (stripe_length >= (1ULL << 32)) {
+		EXOFS_ERR("Total Stripe length(0x%llx)"
+			  " >= 32bit is not supported\n", _LLU(stripe_length));
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 /* @odi is valid only as long as @fscb_dev is valid */
@@ -361,7 +413,7 @@
 {
 	struct exofs_sb_info *sbi = *psbi;
 	struct osd_dev *fscb_od;
-	struct osd_obj_id obj = {.partition = sbi->s_pid,
+	struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
 				 .id = EXOFS_DEVTABLE_ID};
 	struct exofs_device_table *dt;
 	unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
@@ -376,9 +428,9 @@
 		return -ENOMEM;
 	}
 
-	fscb_od = sbi->s_ods[0];
-	sbi->s_ods[0] = NULL;
-	sbi->s_numdevs = 0;
+	fscb_od = sbi->layout.s_ods[0];
+	sbi->layout.s_ods[0] = NULL;
+	sbi->layout.s_numdevs = 0;
 	ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
 	if (unlikely(ret)) {
 		EXOFS_ERR("ERROR: reading device table\n");
@@ -397,14 +449,15 @@
 		goto out;
 
 	if (likely(numdevs > 1)) {
-		unsigned size = numdevs * sizeof(sbi->s_ods[0]);
+		unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
 
 		sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
 		if (unlikely(!sbi)) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0]));
+		memset(&sbi->layout.s_ods[1], 0,
+		       size - sizeof(sbi->layout.s_ods[0]));
 		*psbi = sbi;
 	}
 
@@ -427,8 +480,8 @@
 		 * line. We always keep them in device-table order.
 		 */
 		if (fscb_od && osduld_device_same(fscb_od, &odi)) {
-			sbi->s_ods[i] = fscb_od;
-			++sbi->s_numdevs;
+			sbi->layout.s_ods[i] = fscb_od;
+			++sbi->layout.s_numdevs;
 			fscb_od = NULL;
 			continue;
 		}
@@ -441,8 +494,8 @@
 			goto out;
 		}
 
-		sbi->s_ods[i] = od;
-		++sbi->s_numdevs;
+		sbi->layout.s_ods[i] = od;
+		++sbi->layout.s_numdevs;
 
 		/* Read the fscb of the other devices to make sure the FS
 		 * partition is there.
@@ -499,9 +552,15 @@
 		goto free_sbi;
 	}
 
-	sbi->s_ods[0] = od;
-	sbi->s_numdevs = 1;
-	sbi->s_pid = opts->pid;
+	/* Default layout in case we do not have a device-table */
+	sbi->layout.stripe_unit = PAGE_SIZE;
+	sbi->layout.mirrors_p1 = 1;
+	sbi->layout.group_width = 1;
+	sbi->layout.group_depth = -1;
+	sbi->layout.group_count = 1;
+	sbi->layout.s_ods[0] = od;
+	sbi->layout.s_numdevs = 1;
+	sbi->layout.s_pid = opts->pid;
 	sbi->s_timeout = opts->timeout;
 
 	/* fill in some other data by hand */
@@ -514,7 +573,7 @@
 	sb->s_bdev = NULL;
 	sb->s_dev = 0;
 
-	obj.partition = sbi->s_pid;
+	obj.partition = sbi->layout.s_pid;
 	obj.id = EXOFS_SUPER_ID;
 	exofs_make_credential(sbi->s_cred, &obj);
 
@@ -578,13 +637,13 @@
 		goto free_sbi;
 	}
 
-	_exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0],
-			    sbi->s_pid);
+	_exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
+			    sbi->layout.s_pid);
 	return 0;
 
 free_sbi:
 	EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
-		  opts->dev_name, sbi->s_pid, ret);
+		  opts->dev_name, sbi->layout.s_pid, ret);
 	exofs_free_sbi(sbi);
 	return ret;
 }
@@ -627,7 +686,7 @@
 	uint8_t cred_a[OSD_CAP_LEN];
 	int ret;
 
-	ret = exofs_get_io_state(sbi, &ios);
+	ret = exofs_get_io_state(&sbi->layout, &ios);
 	if (ret) {
 		EXOFS_DBGMSG("exofs_get_io_state failed.\n");
 		return ret;
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 7f8d2e5..1d081f0 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -570,7 +570,7 @@
 error_return:
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
-	vfs_dq_free_block(inode, freed);
+	dquot_free_block(inode, freed);
 }
 
 /**
@@ -1236,6 +1236,7 @@
 	unsigned short windowsz = 0;
 	unsigned long ngroups;
 	unsigned long num = *count;
+	int ret;
 
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
@@ -1247,8 +1248,9 @@
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (vfs_dq_alloc_block(inode, num)) {
-		*errp = -EDQUOT;
+	ret = dquot_alloc_block(inode, num);
+	if (ret) {
+		*errp = ret;
 		return 0;
 	}
 
@@ -1409,7 +1411,7 @@
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	vfs_dq_free_block(inode, *count-num);
+	dquot_free_block(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1420,7 +1422,7 @@
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		vfs_dq_free_block(inode, *count);
+		dquot_free_block(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 061914a..0b038e4 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -118,7 +118,7 @@
 
 /* inode.c */
 extern struct inode *ext2_iget (struct super_block *, unsigned long);
-extern int ext2_write_inode (struct inode *, int);
+extern int ext2_write_inode (struct inode *, struct writeback_control *);
 extern void ext2_delete_inode (struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 586e358..5d198d0 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -20,6 +20,7 @@
 
 #include <linux/time.h>
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -70,7 +71,7 @@
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 	.splice_read	= generic_file_splice_read,
@@ -87,7 +88,7 @@
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
 	.mmap		= xip_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 };
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 15387c9..ad7d572 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -121,8 +121,8 @@
 	if (!is_bad_inode(inode)) {
 		/* Quota is already initialized in iput() */
 		ext2_xattr_delete_inode(inode);
-		vfs_dq_free_inode(inode);
-		vfs_dq_drop(inode);
+		dquot_free_inode(inode);
+		dquot_drop(inode);
 	}
 
 	es = EXT2_SB(sb)->s_es;
@@ -586,10 +586,10 @@
 		goto fail_drop;
 	}
 
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	dquot_initialize(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext2_init_acl(inode, dir);
 	if (err)
@@ -605,10 +605,10 @@
 	return inode;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 71b032c..fc13cc1 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,6 +41,8 @@
 MODULE_DESCRIPTION("Second Extended Filesystem");
 MODULE_LICENSE("GPL");
 
+static int __ext2_write_inode(struct inode *inode, int do_sync);
+
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -58,13 +60,15 @@
  */
 void ext2_delete_inode (struct inode * inode)
 {
+	if (!is_bad_inode(inode))
+		dquot_initialize(inode);
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
 	mark_inode_dirty(inode);
-	ext2_write_inode(inode, inode_needs_sync(inode));
+	__ext2_write_inode(inode, inode_needs_sync(inode));
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
@@ -1335,7 +1339,7 @@
 	return ERR_PTR(ret);
 }
 
-int ext2_write_inode(struct inode *inode, int do_sync)
+static int __ext2_write_inode(struct inode *inode, int do_sync)
 {
 	struct ext2_inode_info *ei = EXT2_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1440,6 +1444,11 @@
 	return err;
 }
 
+int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int ext2_sync_inode(struct inode *inode)
 {
 	struct writeback_control wbc = {
@@ -1457,9 +1466,12 @@
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
+
+	if (iattr->ia_valid & ATTR_SIZE)
+		dquot_initialize(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, iattr);
 		if (error)
 			return error;
 	}
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index dd7175c..71efb0e 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -99,24 +100,27 @@
  */
 static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
 {
-	struct inode * inode = ext2_new_inode (dir, mode);
-	int err = PTR_ERR(inode);
-	if (!IS_ERR(inode)) {
-		inode->i_op = &ext2_file_inode_operations;
-		if (ext2_use_xip(inode->i_sb)) {
-			inode->i_mapping->a_ops = &ext2_aops_xip;
-			inode->i_fop = &ext2_xip_file_operations;
-		} else if (test_opt(inode->i_sb, NOBH)) {
-			inode->i_mapping->a_ops = &ext2_nobh_aops;
-			inode->i_fop = &ext2_file_operations;
-		} else {
-			inode->i_mapping->a_ops = &ext2_aops;
-			inode->i_fop = &ext2_file_operations;
-		}
-		mark_inode_dirty(inode);
-		err = ext2_add_nondir(dentry, inode);
+	struct inode *inode;
+
+	dquot_initialize(dir);
+
+	inode = ext2_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &ext2_file_inode_operations;
+	if (ext2_use_xip(inode->i_sb)) {
+		inode->i_mapping->a_ops = &ext2_aops_xip;
+		inode->i_fop = &ext2_xip_file_operations;
+	} else if (test_opt(inode->i_sb, NOBH)) {
+		inode->i_mapping->a_ops = &ext2_nobh_aops;
+		inode->i_fop = &ext2_file_operations;
+	} else {
+		inode->i_mapping->a_ops = &ext2_aops;
+		inode->i_fop = &ext2_file_operations;
 	}
-	return err;
+	mark_inode_dirty(inode);
+	return ext2_add_nondir(dentry, inode);
 }
 
 static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
@@ -127,6 +131,8 @@
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	dquot_initialize(dir);
+
 	inode = ext2_new_inode (dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -151,6 +157,8 @@
 	if (l > sb->s_blocksize)
 		goto out;
 
+	dquot_initialize(dir);
+
 	inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
@@ -194,6 +202,8 @@
 	if (inode->i_nlink >= EXT2_LINK_MAX)
 		return -EMLINK;
 
+	dquot_initialize(dir);
+
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
@@ -216,6 +226,8 @@
 	if (dir->i_nlink >= EXT2_LINK_MAX)
 		goto out;
 
+	dquot_initialize(dir);
+
 	inode_inc_link_count(dir);
 
 	inode = ext2_new_inode (dir, S_IFDIR | mode);
@@ -262,6 +274,8 @@
 	struct page * page;
 	int err = -ENOENT;
 
+	dquot_initialize(dir);
+
 	de = ext2_find_entry (dir, &dentry->d_name, &page);
 	if (!de)
 		goto out;
@@ -304,6 +318,9 @@
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f9cb54a..42e4a30 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -194,6 +194,8 @@
 static void ext2_clear_inode(struct inode *inode)
 {
 	struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
+
+	dquot_drop(inode);
 	ext2_discard_reservation(inode);
 	EXT2_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 904f006..e44dc92 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -644,8 +644,8 @@
 				   the inode.  */
 				ea_bdebug(new_bh, "reusing block");
 
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1)) {
+				error = dquot_alloc_block(inode, 1);
+				if (error) {
 					unlock_buffer(new_bh);
 					goto cleanup;
 				}
@@ -702,7 +702,7 @@
 		 * as if nothing happened and cleanup the unused block */
 		if (error && error != -ENOSPC) {
 			if (new_bh && new_bh != old_bh)
-				vfs_dq_free_block(inode, 1);
+				dquot_free_block(inode, 1);
 			goto cleanup;
 		}
 	} else
@@ -734,7 +734,7 @@
 			le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
 			if (ce)
 				mb_cache_entry_release(ce);
-			vfs_dq_free_block(inode, 1);
+			dquot_free_block(inode, 1);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -797,7 +797,7 @@
 		mark_buffer_dirty(bh);
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 	}
 	EXT2_I(inode)->i_file_acl = 0;
 
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 27967f9..161da2d 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -676,7 +676,7 @@
 	}
 	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
 	if (dquot_freed_blocks)
-		vfs_dq_free_block(inode, dquot_freed_blocks);
+		dquot_free_block(inode, dquot_freed_blocks);
 	return;
 }
 
@@ -1502,8 +1502,9 @@
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (vfs_dq_alloc_block(inode, num)) {
-		*errp = -EDQUOT;
+	err = dquot_alloc_block(inode, num);
+	if (err) {
+		*errp = err;
 		return 0;
 	}
 
@@ -1713,7 +1714,7 @@
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	vfs_dq_free_block(inode, *count-num);
+	dquot_free_block(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1728,7 +1729,7 @@
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		vfs_dq_free_block(inode, *count);
+		dquot_free_block(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 388bbdf..f55df0e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -21,6 +21,7 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
+#include <linux/quotaops.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include "xattr.h"
@@ -33,9 +34,9 @@
  */
 static int ext3_release_file (struct inode * inode, struct file * filp)
 {
-	if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) {
 		filemap_flush(inode->i_mapping);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE;
+		ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 	}
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
@@ -62,7 +63,7 @@
 	.compat_ioctl	= ext3_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index b399912..ef9008b 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -123,10 +123,10 @@
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ext3_xattr_delete_inode(handle, inode);
-	vfs_dq_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_free_inode(inode);
+	dquot_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
 
@@ -588,10 +588,10 @@
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	dquot_initialize(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext3_init_acl(handle, inode, dir);
 	if (err)
@@ -619,10 +619,10 @@
 	return ret;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6..7f920b7 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -196,6 +196,9 @@
 {
 	handle_t *handle;
 
+	if (!is_bad_inode(inode))
+		dquot_initialize(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -1378,7 +1381,7 @@
 	 */
 	if (pos + len > inode->i_size && ext3_can_truncate(inode))
 		ext3_orphan_add(handle, inode);
-	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+	ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 	if (inode->i_size > EXT3_I(inode)->i_disksize) {
 		EXT3_I(inode)->i_disksize = inode->i_size;
 		ret2 = ext3_mark_inode_dirty(handle, inode);
@@ -1417,7 +1420,7 @@
 	journal_t *journal;
 	int err;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) {
 		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
@@ -1436,7 +1439,7 @@
 		 * everything they get.
 		 */
 
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
+		ext3_clear_inode_state(inode, EXT3_STATE_JDATA);
 		journal = EXT3_JOURNAL(inode);
 		journal_lock_updates(journal);
 		err = journal_flush(journal);
@@ -1528,6 +1531,7 @@
 	int err;
 
 	J_ASSERT(PageLocked(page));
+	WARN_ON_ONCE(IS_RDONLY(inode));
 
 	/*
 	 * We give up here if we're reentered, because it might be for a
@@ -1600,6 +1604,9 @@
 	int ret = 0;
 	int err;
 
+	J_ASSERT(PageLocked(page));
+	WARN_ON_ONCE(IS_RDONLY(inode));
+
 	if (ext3_journal_current_handle())
 		goto out_fail;
 
@@ -1642,6 +1649,9 @@
 	int ret = 0;
 	int err;
 
+	J_ASSERT(PageLocked(page));
+	WARN_ON_ONCE(IS_RDONLY(inode));
+
 	if (ext3_journal_current_handle())
 		goto no_write;
 
@@ -1670,7 +1680,7 @@
 				PAGE_CACHE_SIZE, NULL, write_end_fn);
 		if (ret == 0)
 			ret = err;
-		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 		unlock_page(page);
 	} else {
 		/*
@@ -1785,8 +1795,9 @@
 		handle = ext3_journal_start(inode, 2);
 		if (IS_ERR(handle)) {
 			/* This is really bad luck. We've written the data
-			 * but cannot extend i_size. Bail out and pretend
-			 * the write failed... */
+			 * but cannot extend i_size. Truncate allocated blocks
+			 * and pretend the write failed... */
+			ext3_truncate(inode);
 			ret = PTR_ERR(handle);
 			goto out;
 		}
@@ -2402,7 +2413,7 @@
 		goto out_notrans;
 
 	if (inode->i_size == 0 && ext3_should_writeback_data(inode))
-		ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
+		ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 
 	/*
 	 * We have to lock the EOF page here, because lock_page() nests
@@ -2721,7 +2732,7 @@
 {
 	/* We have all inode data except xattrs in memory here. */
 	return __ext3_get_inode_loc(inode, iloc,
-		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+		!ext3_test_inode_state(inode, EXT3_STATE_XATTR));
 }
 
 void ext3_set_inode_flags(struct inode *inode)
@@ -2893,7 +2904,7 @@
 					EXT3_GOOD_OLD_INODE_SIZE +
 					ei->i_extra_isize;
 			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
-				 ei->i_state |= EXT3_STATE_XATTR;
+				 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
 		}
 	} else
 		ei->i_extra_isize = 0;
@@ -2955,7 +2966,7 @@
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
-	if (ei->i_state & EXT3_STATE_NEW)
+	if (ext3_test_inode_state(inode, EXT3_STATE_NEW))
 		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
 
 	ext3_get_inode_flags(ei);
@@ -3052,7 +3063,7 @@
 	rc = ext3_journal_dirty_metadata(handle, bh);
 	if (!err)
 		err = rc;
-	ei->i_state &= ~EXT3_STATE_NEW;
+	ext3_clear_inode_state(inode, EXT3_STATE_NEW);
 
 	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
 out_brelse:
@@ -3096,7 +3107,7 @@
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext3_write_inode(struct inode *inode, int wait)
+int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	if (current->flags & PF_MEMALLOC)
 		return 0;
@@ -3107,7 +3118,7 @@
 		return -EIO;
 	}
 
-	if (!wait)
+	if (wbc->sync_mode != WB_SYNC_ALL)
 		return 0;
 
 	return ext3_force_commit(inode->i_sb);
@@ -3140,6 +3151,8 @@
 	if (error)
 		return error;
 
+	if (ia_valid & ATTR_SIZE)
+		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
@@ -3152,7 +3165,7 @@
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error) {
 			ext3_journal_stop(handle);
 			return error;
@@ -3237,7 +3250,7 @@
 		ret = 2 * (bpp + indirects) + 2;
 
 #ifdef CONFIG_QUOTA
-	/* We know that structure was already allocated during vfs_dq_init so
+	/* We know that structure was already allocated during dquot_initialize so
 	 * we will be updating only the data blocks + inodes */
 	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 #endif
@@ -3328,7 +3341,7 @@
  * i_size has been changed by generic_commit_write() and we thus need
  * to include the updated inode in the current transaction.
  *
- * Also, vfs_dq_alloc_space() will always dirty the inode when blocks
+ * Also, dquot_alloc_space() will always dirty the inode when blocks
  * are allocated to the file.
  *
  * If the inode is marked synchronous, we don't honour that here - doing
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 7b0e44f..ee18408 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1696,6 +1696,8 @@
 	struct inode * inode;
 	int err, retries = 0;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1730,6 +1732,8 @@
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1766,6 +1770,8 @@
 	if (dir->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -2060,7 +2066,9 @@
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
+
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2119,7 +2127,9 @@
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
+
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2174,6 +2184,8 @@
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2228,6 +2240,9 @@
 
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
+
+	dquot_initialize(dir);
+
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
 	 * otherwise has the potential to corrupt the orphan inode list.
@@ -2278,12 +2293,15 @@
 	struct ext3_dir_entry_2 * old_de, * new_de;
 	int retval, flush_file = 0;
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	if (new_dentry->d_inode)
-		vfs_dq_init(new_dentry->d_inode);
+		dquot_initialize(new_dentry->d_inode);
 	handle = ext3_journal_start(old_dir, 2 *
 					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index afa2b56..e844acc 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -181,7 +181,7 @@
 	if (!test_opt (sb, ERRORS_CONT)) {
 		journal_t *journal = EXT3_SB(sb)->s_journal;
 
-		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+		set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
 		if (journal)
 			journal_abort(journal, -EIO);
 	}
@@ -296,7 +296,7 @@
 		"error: remounting filesystem read-only");
 	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
-	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+	set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
 	if (EXT3_SB(sb)->s_journal)
 		journal_abort(EXT3_SB(sb)->s_journal, -EIO);
 }
@@ -528,6 +528,8 @@
 static void ext3_clear_inode(struct inode *inode)
 {
 	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
+
+	dquot_drop(inode);
 	ext3_discard_reservation(inode);
 	EXT3_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
@@ -562,10 +564,10 @@
 	if (sbi->s_qf_names[GRPQUOTA])
 		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
+	if (test_opt(sb, USRQUOTA))
 		seq_puts(seq, ",usrquota");
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
+	if (test_opt(sb, GRPQUOTA))
 		seq_puts(seq, ",grpquota");
 #endif
 }
@@ -656,8 +658,7 @@
 	if (test_opt(sb, NOBH))
 		seq_puts(seq, ",nobh");
 
-	seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt &
-						     EXT3_MOUNT_DATA_FLAGS));
+	seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
 	if (test_opt(sb, DATA_ERR_ABORT))
 		seq_puts(seq, ",data_err=abort");
 
@@ -751,13 +752,6 @@
 				const char *data, size_t len, loff_t off);
 
 static const struct dquot_operations ext3_quota_operations = {
-	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
-	.free_inode	= dquot_free_inode,
-	.transfer	= dquot_transfer,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
@@ -896,6 +890,63 @@
 	return sb_block;
 }
 
+#ifdef CONFIG_QUOTA
+static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	char *qname;
+
+	if (sb_any_quota_loaded(sb) &&
+		!sbi->s_qf_names[qtype]) {
+		ext3_msg(sb, KERN_ERR,
+			"Cannot change journaled "
+			"quota options when quota turned on");
+		return 0;
+	}
+	qname = match_strdup(args);
+	if (!qname) {
+		ext3_msg(sb, KERN_ERR,
+			"Not enough memory for storing quotafile name");
+		return 0;
+	}
+	if (sbi->s_qf_names[qtype] &&
+		strcmp(sbi->s_qf_names[qtype], qname)) {
+		ext3_msg(sb, KERN_ERR,
+			"%s quota file already specified", QTYPE2NAME(qtype));
+		kfree(qname);
+		return 0;
+	}
+	sbi->s_qf_names[qtype] = qname;
+	if (strchr(sbi->s_qf_names[qtype], '/')) {
+		ext3_msg(sb, KERN_ERR,
+			"quotafile must be on filesystem root");
+		kfree(sbi->s_qf_names[qtype]);
+		sbi->s_qf_names[qtype] = NULL;
+		return 0;
+	}
+	set_opt(sbi->s_mount_opt, QUOTA);
+	return 1;
+}
+
+static int clear_qf_name(struct super_block *sb, int qtype) {
+
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (sb_any_quota_loaded(sb) &&
+		sbi->s_qf_names[qtype]) {
+		ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options"
+			" when quota turned on");
+		return 0;
+	}
+	/*
+	 * The space will be released later when all options are confirmed
+	 * to be correct
+	 */
+	sbi->s_qf_names[qtype] = NULL;
+	return 1;
+}
+#endif
+
 static int parse_options (char *options, struct super_block *sb,
 			  unsigned int *inum, unsigned long *journal_devnum,
 			  ext3_fsblk_t *n_blocks_count, int is_remount)
@@ -906,8 +957,7 @@
 	int data_opt = 0;
 	int option;
 #ifdef CONFIG_QUOTA
-	int qtype, qfmt;
-	char *qname;
+	int qfmt;
 #endif
 
 	if (!options)
@@ -1065,20 +1115,19 @@
 			data_opt = EXT3_MOUNT_WRITEBACK_DATA;
 		datacheck:
 			if (is_remount) {
-				if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
-						== data_opt)
+				if (test_opt(sb, DATA_FLAGS) == data_opt)
 					break;
 				ext3_msg(sb, KERN_ERR,
 					"error: cannot change "
 					"data mode on remount. The filesystem "
 					"is mounted in data=%s mode and you "
 					"try to remount it in data=%s mode.",
-					data_mode_string(sbi->s_mount_opt &
-							EXT3_MOUNT_DATA_FLAGS),
+					data_mode_string(test_opt(sb,
+							DATA_FLAGS)),
 					data_mode_string(data_opt));
 				return 0;
 			} else {
-				sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
+				clear_opt(sbi->s_mount_opt, DATA_FLAGS);
 				sbi->s_mount_opt |= data_opt;
 			}
 			break;
@@ -1090,62 +1139,20 @@
 			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
-			qtype = USRQUOTA;
-			goto set_qf_name;
+			if (!set_qf_name(sb, USRQUOTA, &args[0]))
+				return 0;
+			break;
 		case Opt_grpjquota:
-			qtype = GRPQUOTA;
-set_qf_name:
-			if (sb_any_quota_loaded(sb) &&
-			    !sbi->s_qf_names[qtype]) {
-				ext3_msg(sb, KERN_ERR,
-					"error: cannot change journaled "
-					"quota options when quota turned on.");
+			if (!set_qf_name(sb, GRPQUOTA, &args[0]))
 				return 0;
-			}
-			qname = match_strdup(&args[0]);
-			if (!qname) {
-				ext3_msg(sb, KERN_ERR,
-					"error: not enough memory for "
-					"storing quotafile name.");
-				return 0;
-			}
-			if (sbi->s_qf_names[qtype] &&
-			    strcmp(sbi->s_qf_names[qtype], qname)) {
-				ext3_msg(sb, KERN_ERR,
-					"error: %s quota file already "
-					"specified.", QTYPE2NAME(qtype));
-				kfree(qname);
-				return 0;
-			}
-			sbi->s_qf_names[qtype] = qname;
-			if (strchr(sbi->s_qf_names[qtype], '/')) {
-				ext3_msg(sb, KERN_ERR,
-					"error: quotafile must be on "
-					"filesystem root.");
-				kfree(sbi->s_qf_names[qtype]);
-				sbi->s_qf_names[qtype] = NULL;
-				return 0;
-			}
-			set_opt(sbi->s_mount_opt, QUOTA);
 			break;
 		case Opt_offusrjquota:
-			qtype = USRQUOTA;
-			goto clear_qf_name;
-		case Opt_offgrpjquota:
-			qtype = GRPQUOTA;
-clear_qf_name:
-			if (sb_any_quota_loaded(sb) &&
-			    sbi->s_qf_names[qtype]) {
-				ext3_msg(sb, KERN_ERR, "error: cannot change "
-					"journaled quota options when "
-					"quota turned on.");
+			if (!clear_qf_name(sb, USRQUOTA))
 				return 0;
-			}
-			/*
-			 * The space will be released later when all options
-			 * are confirmed to be correct
-			 */
-			sbi->s_qf_names[qtype] = NULL;
+			break;
+		case Opt_offgrpjquota:
+			if (!clear_qf_name(sb, GRPQUOTA))
+				return 0;
 			break;
 		case Opt_jqfmt_vfsold:
 			qfmt = QFMT_VFS_OLD;
@@ -1244,18 +1251,12 @@
 	}
 #ifdef CONFIG_QUOTA
 	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
-		if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
-		     sbi->s_qf_names[USRQUOTA])
+		if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
 			clear_opt(sbi->s_mount_opt, USRQUOTA);
-
-		if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
-		     sbi->s_qf_names[GRPQUOTA])
+		if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
 
-		if ((sbi->s_qf_names[USRQUOTA] &&
-				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
-		    (sbi->s_qf_names[GRPQUOTA] &&
-				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
+		if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
 			ext3_msg(sb, KERN_ERR, "error: old and new quota "
 					"format mixing.");
 			return 0;
@@ -1478,7 +1479,7 @@
 		}
 
 		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (inode->i_nlink) {
 			printk(KERN_DEBUG
 				"%s: truncating inode %lu to %Ld bytes\n",
@@ -1671,11 +1672,11 @@
 		set_opt(sbi->s_mount_opt, POSIX_ACL);
 #endif
 	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
-		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
+		set_opt(sbi->s_mount_opt, JOURNAL_DATA);
 	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
-		sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
+		set_opt(sbi->s_mount_opt, ORDERED_DATA);
 	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
-		sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;
+		set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
 
 	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
@@ -1694,7 +1695,7 @@
 		goto failed_mount;
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
 	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
 	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
@@ -2561,11 +2562,11 @@
 		goto restore_opts;
 	}
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+	if (test_opt(sb, ABORT))
 		ext3_abort(sb, __func__, "Abort forced by user");
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
 	es = sbi->s_es;
 
@@ -2573,7 +2574,7 @@
 
 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
 		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
-		if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
+		if (test_opt(sb, ABORT)) {
 			err = -EROFS;
 			goto restore_opts;
 		}
@@ -2734,7 +2735,7 @@
  * Process 1                         Process 2
  * ext3_create()                     quota_sync()
  *   journal_start()                   write_dquot()
- *   vfs_dq_init()                       down(dqio_mutex)
+ *   dquot_initialize()                       down(dqio_mutex)
  *     down(dqio_mutex)                    journal_start()
  *
  */
@@ -2942,9 +2943,7 @@
 	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
 	int err = 0;
 	int offset = off & (sb->s_blocksize - 1);
-	int tocopy;
 	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
-	size_t towrite = len;
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
@@ -2955,53 +2954,54 @@
 			(unsigned long long)off, (unsigned long long)len);
 		return -EIO;
 	}
-	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
-	while (towrite > 0) {
-		tocopy = sb->s_blocksize - offset < towrite ?
-				sb->s_blocksize - offset : towrite;
-		bh = ext3_bread(handle, inode, blk, 1, &err);
-		if (!bh)
-			goto out;
-		if (journal_quota) {
-			err = ext3_journal_get_write_access(handle, bh);
-			if (err) {
-				brelse(bh);
-				goto out;
-			}
-		}
-		lock_buffer(bh);
-		memcpy(bh->b_data+offset, data, tocopy);
-		flush_dcache_page(bh->b_page);
-		unlock_buffer(bh);
-		if (journal_quota)
-			err = ext3_journal_dirty_metadata(handle, bh);
-		else {
-			/* Always do at least ordered writes for quotas */
-			err = ext3_journal_dirty_data(handle, bh);
-			mark_buffer_dirty(bh);
-		}
-		brelse(bh);
-		if (err)
-			goto out;
-		offset = 0;
-		towrite -= tocopy;
-		data += tocopy;
-		blk++;
+
+	/*
+	 * Since we account only one data block in transaction credits,
+	 * then it is impossible to cross a block boundary.
+	 */
+	if (sb->s_blocksize - offset < len) {
+		ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
+			" cancelled because not block aligned",
+			(unsigned long long)off, (unsigned long long)len);
+		return -EIO;
 	}
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
+	bh = ext3_bread(handle, inode, blk, 1, &err);
+	if (!bh)
+		goto out;
+	if (journal_quota) {
+		err = ext3_journal_get_write_access(handle, bh);
+		if (err) {
+			brelse(bh);
+			goto out;
+		}
+	}
+	lock_buffer(bh);
+	memcpy(bh->b_data+offset, data, len);
+	flush_dcache_page(bh->b_page);
+	unlock_buffer(bh);
+	if (journal_quota)
+		err = ext3_journal_dirty_metadata(handle, bh);
+	else {
+		/* Always do at least ordered writes for quotas */
+		err = ext3_journal_dirty_data(handle, bh);
+		mark_buffer_dirty(bh);
+	}
+	brelse(bh);
 out:
-	if (len == towrite) {
+	if (err) {
 		mutex_unlock(&inode->i_mutex);
 		return err;
 	}
-	if (inode->i_size < off+len-towrite) {
-		i_size_write(inode, off+len-towrite);
+	if (inode->i_size < off + len) {
+		i_size_write(inode, off + len);
 		EXT3_I(inode)->i_disksize = inode->i_size;
 	}
 	inode->i_version++;
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	ext3_mark_inode_dirty(handle, inode);
 	mutex_unlock(&inode->i_mutex);
-	return len - towrite;
+	return len;
 }
 
 #endif
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 66895cc..534a94c 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -274,7 +274,7 @@
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
 		return -ENODATA;
 	error = ext3_get_inode_loc(inode, &iloc);
 	if (error)
@@ -403,7 +403,7 @@
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
 		return 0;
 	error = ext3_get_inode_loc(inode, &iloc);
 	if (error)
@@ -500,7 +500,7 @@
 		error = ext3_journal_dirty_metadata(handle, bh);
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -775,8 +775,8 @@
 			else {
 				/* The old block is released after updating
 				   the inode. */
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1))
+				error = dquot_alloc_block(inode, 1);
+				if (error)
 					goto cleanup;
 				error = ext3_journal_get_write_access(handle,
 								      new_bh);
@@ -850,7 +850,7 @@
 	return error;
 
 cleanup_dquot:
-	vfs_dq_free_block(inode, 1);
+	dquot_free_block(inode, 1);
 	goto cleanup;
 
 bad_block:
@@ -882,7 +882,7 @@
 	is->s.base = is->s.first = IFIRST(header);
 	is->s.here = is->s.first;
 	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) {
 		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
 		if (error)
 			return error;
@@ -914,10 +914,10 @@
 	header = IHDR(inode, ext3_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
 		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+		ext3_set_inode_state(inode, EXT3_STATE_XATTR);
 	} else {
 		header->h_magic = cpu_to_le32(0);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+		ext3_clear_inode_state(inode, EXT3_STATE_XATTR);
 	}
 	return 0;
 }
@@ -967,10 +967,10 @@
 	if (error)
 		goto cleanup;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) {
 		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
 		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+		ext3_clear_inode_state(inode, EXT3_STATE_NEW);
 	}
 
 	error = ext3_xattr_ibody_find(inode, &i, &is);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 22bc743..d2f37a5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -97,8 +97,8 @@
 		/* If checksum is bad mark all blocks used to prevent allocation
 		 * essentially implementing a per-group read-only flag. */
 		if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-			ext4_error(sb, __func__,
-				  "Checksum bad for group %u", block_group);
+			ext4_error(sb, "Checksum bad for group %u",
+					block_group);
 			ext4_free_blks_set(sb, gdp, 0);
 			ext4_free_inodes_set(sb, gdp, 0);
 			ext4_itable_unused_set(sb, gdp, 0);
@@ -130,8 +130,7 @@
 		 * to make sure we calculate the right free blocks
 		 */
 		group_blocks = ext4_blocks_count(sbi->s_es) -
-			le32_to_cpu(sbi->s_es->s_first_data_block) -
-			(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
+			ext4_group_first_block_no(sb, ngroups - 1);
 	} else {
 		group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
 	}
@@ -189,9 +188,6 @@
  * when a file system is mounted (see ext4_fill_super).
  */
 
-
-#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
-
 /**
  * ext4_get_group_desc() -- load group descriptor from disk
  * @sb:			super block
@@ -210,10 +206,8 @@
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (block_group >= ngroups) {
-		ext4_error(sb, "ext4_get_group_desc",
-			   "block_group >= groups_count - "
-			   "block_group = %u, groups_count = %u",
-			   block_group, ngroups);
+		ext4_error(sb, "block_group >= groups_count - block_group = %u,"
+			   " groups_count = %u", block_group, ngroups);
 
 		return NULL;
 	}
@@ -221,8 +215,7 @@
 	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
 	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
 	if (!sbi->s_group_desc[group_desc]) {
-		ext4_error(sb, "ext4_get_group_desc",
-			   "Group descriptor not loaded - "
+		ext4_error(sb, "Group descriptor not loaded - "
 			   "block_group = %u, group_desc = %u, desc = %u",
 			   block_group, group_desc, offset);
 		return NULL;
@@ -282,9 +275,7 @@
 		return 1;
 
 err_out:
-	ext4_error(sb, __func__,
-			"Invalid block bitmap - "
-			"block_group = %d, block = %llu",
+	ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
 			block_group, bitmap_blk);
 	return 0;
 }
@@ -311,8 +302,7 @@
 	bitmap_blk = ext4_block_bitmap(sb, desc);
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
-		ext4_error(sb, __func__,
-			    "Cannot read block bitmap - "
+		ext4_error(sb, "Cannot read block bitmap - "
 			    "block_group = %u, block_bitmap = %llu",
 			    block_group, bitmap_blk);
 		return NULL;
@@ -354,8 +344,7 @@
 	set_bitmap_uptodate(bh);
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
-		ext4_error(sb, __func__,
-			    "Cannot read block bitmap - "
+		ext4_error(sb, "Cannot read block bitmap - "
 			    "block_group = %u, block_bitmap = %llu",
 			    block_group, bitmap_blk);
 		return NULL;
@@ -419,8 +408,7 @@
 	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
 	    in_range(block + count - 1, ext4_inode_table(sb, desc),
 		     sbi->s_itb_per_group)) {
-		ext4_error(sb, __func__,
-			   "Adding blocks in system zones - "
+		ext4_error(sb, "Adding blocks in system zones - "
 			   "Block = %llu, count = %lu",
 			   block, count);
 		goto error_return;
@@ -453,8 +441,7 @@
 		BUFFER_TRACE(bitmap_bh, "clear bit");
 		if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
 						bit + i, bitmap_bh->b_data)) {
-			ext4_error(sb, __func__,
-				   "bit already cleared for block %llu",
+			ext4_error(sb, "bit already cleared for block %llu",
 				   (ext4_fsblk_t)(block + i));
 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
 		} else {
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index a60ab9a..983f0e1 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -205,14 +205,14 @@
 		entry = rb_entry(n, struct ext4_system_zone, node);
 		kmem_cache_free(ext4_system_zone_cachep, entry);
 		if (!parent)
-			EXT4_SB(sb)->system_blks.rb_node = NULL;
+			EXT4_SB(sb)->system_blks = RB_ROOT;
 		else if (parent->rb_left == n)
 			parent->rb_left = NULL;
 		else if (parent->rb_right == n)
 			parent->rb_right = NULL;
 		n = parent;
 	}
-	EXT4_SB(sb)->system_blks.rb_node = NULL;
+	EXT4_SB(sb)->system_blks = RB_ROOT;
 }
 
 /*
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9dc93168..86cb6d8 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -83,10 +83,12 @@
 		error_msg = "inode out of bounds";
 
 	if (error_msg != NULL)
-		ext4_error(dir->i_sb, function,
-			"bad entry in directory #%lu: %s - "
-			"offset=%u, inode=%u, rec_len=%d, name_len=%d",
-			dir->i_ino, error_msg, offset,
+		__ext4_error(dir->i_sb, function,
+			"bad entry in directory #%lu: %s - block=%llu"
+			"offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
+			dir->i_ino, error_msg, 
+			(unsigned long long) bh->b_blocknr,     
+			(unsigned) (offset%bh->b_size), offset,
 			le32_to_cpu(de->inode),
 			rlen, de->name_len);
 	return error_msg == NULL ? 1 : 0;
@@ -150,7 +152,7 @@
 		 */
 		if (!bh) {
 			if (!dir_has_error) {
-				ext4_error(sb, __func__, "directory #%lu "
+				ext4_error(sb, "directory #%lu "
 					   "contains a hole at offset %Lu",
 					   inode->i_ino,
 					   (unsigned long long) filp->f_pos);
@@ -303,7 +305,7 @@
 			kfree(old);
 		}
 		if (!parent)
-			root->rb_node = NULL;
+			*root = RB_ROOT;
 		else if (parent->rb_left == n)
 			parent->rb_left = NULL;
 		else if (parent->rb_right == n)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4cedc91..bf938cf 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -53,6 +53,12 @@
 #define ext4_debug(f, a...)	do {} while (0)
 #endif
 
+#define EXT4_ERROR_INODE(inode, fmt, a...) \
+	ext4_error_inode(__func__, (inode), (fmt), ## a);
+
+#define EXT4_ERROR_FILE(file, fmt, a...)	\
+	ext4_error_file(__func__, (file), (fmt), ## a);
+
 /* data type for block offset of block group */
 typedef int ext4_grpblk_t;
 
@@ -133,14 +139,14 @@
 	int pages_written;
 	int retval;
 };
-#define	DIO_AIO_UNWRITTEN	0x1
+#define	EXT4_IO_UNWRITTEN	0x1
 typedef struct ext4_io_end {
 	struct list_head	list;		/* per-file finished AIO list */
 	struct inode		*inode;		/* file being written to */
 	unsigned int		flag;		/* unwritten or not */
-	int			error;		/* I/O error code */
-	ext4_lblk_t		offset;		/* offset in the file */
-	size_t			size;		/* size of the extent */
+	struct page		*page;		/* page struct for buffer write */
+	loff_t			offset;		/* offset in the file */
+	ssize_t			size;		/* size of the extent */
 	struct work_struct	work;		/* data work queue */
 } ext4_io_end_t;
 
@@ -284,10 +290,12 @@
 #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
 #define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
+#define EXT4_EA_INODE_FL	        0x00200000 /* Inode used for large EA */
+#define EXT4_EOFBLOCKS_FL		0x00400000 /* Blocks allocated beyond EOF */
 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
-#define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE		0x000B80FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE		0x004BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE		0x004B80FF /* User modifiable flags */
 
 /* Flags that should be inherited by new inodes from their parent. */
 #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
@@ -313,17 +321,6 @@
 		return flags & EXT4_OTHER_FLMASK;
 }
 
-/*
- * Inode dynamic state flags
- */
-#define EXT4_STATE_JDATA		0x00000001 /* journaled data exists */
-#define EXT4_STATE_NEW			0x00000002 /* inode is newly created */
-#define EXT4_STATE_XATTR		0x00000004 /* has in-inode xattrs */
-#define EXT4_STATE_NO_EXPAND		0x00000008 /* No space for expansion */
-#define EXT4_STATE_DA_ALLOC_CLOSE	0x00000010 /* Alloc DA blks on close */
-#define EXT4_STATE_EXT_MIGRATE		0x00000020 /* Inode is migrating */
-#define EXT4_STATE_DIO_UNWRITTEN	0x00000040 /* need convert on dio done*/
-
 /* Used to pass group descriptor data when online resize is done */
 struct ext4_new_group_input {
 	__u32 group;		/* Group number for this data */
@@ -364,19 +361,20 @@
 	/* caller is from the direct IO path, request to creation of an
 	unitialized extents if not allocated, split the uninitialized
 	extent if blocks has been preallocated already*/
-#define EXT4_GET_BLOCKS_DIO			0x0008
+#define EXT4_GET_BLOCKS_PRE_IO			0x0008
 #define EXT4_GET_BLOCKS_CONVERT			0x0010
-#define EXT4_GET_BLOCKS_DIO_CREATE_EXT		(EXT4_GET_BLOCKS_DIO|\
+#define EXT4_GET_BLOCKS_IO_CREATE_EXT		(EXT4_GET_BLOCKS_PRE_IO|\
 					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
-	/* Convert extent to initialized after direct IO complete */
-#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
-					 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
+	/* Convert extent to initialized after IO complete */
+#define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
+					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
 
 /*
  * Flags used by ext4_free_blocks
  */
 #define EXT4_FREE_BLOCKS_METADATA	0x0001
 #define EXT4_FREE_BLOCKS_FORGET		0x0002
+#define EXT4_FREE_BLOCKS_VALIDATED	0x0004
 
 /*
  * ioctl commands
@@ -630,7 +628,7 @@
 	 * near to their parent directory's inode.
 	 */
 	ext4_group_t	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext4 */
+	unsigned long	i_state_flags;		/* Dynamic state flags */
 
 	ext4_lblk_t		i_dir_start_lookup;
 #ifdef CONFIG_EXT4_FS_XATTR
@@ -708,8 +706,9 @@
 	qsize_t i_reserved_quota;
 #endif
 
-	/* completed async DIOs that might need unwritten extents handling */
-	struct list_head i_aio_dio_complete_list;
+	/* completed IOs that might need unwritten extents handling */
+	struct list_head i_completed_io_list;
+	spinlock_t i_completed_io_lock;
 	/* current io_end structure for async DIO write*/
 	ext4_io_end_t *cur_aio_dio;
 
@@ -760,6 +759,7 @@
 #define EXT4_MOUNT_QUOTA		0x80000 /* Some quota option set */
 #define EXT4_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
 #define EXT4_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+#define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
@@ -1050,6 +1050,34 @@
 		(ino >= EXT4_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
+
+/*
+ * Inode dynamic state flags
+ */
+enum {
+	EXT4_STATE_JDATA,		/* journaled data exists */
+	EXT4_STATE_NEW,			/* inode is newly created */
+	EXT4_STATE_XATTR,		/* has in-inode xattrs */
+	EXT4_STATE_NO_EXPAND,		/* No space for expansion */
+	EXT4_STATE_DA_ALLOC_CLOSE,	/* Alloc DA blks on close */
+	EXT4_STATE_EXT_MIGRATE,		/* Inode is migrating */
+	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
+};
+
+static inline int ext4_test_inode_state(struct inode *inode, int bit)
+{
+	return test_bit(bit, &EXT4_I(inode)->i_state_flags);
+}
+
+static inline void ext4_set_inode_state(struct inode *inode, int bit)
+{
+	set_bit(bit, &EXT4_I(inode)->i_state_flags);
+}
+
+static inline void ext4_clear_inode_state(struct inode *inode, int bit)
+{
+	clear_bit(bit, &EXT4_I(inode)->i_state_flags);
+}
 #else
 /* Assume that user mode programs are passing in an ext4fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
@@ -1126,6 +1154,8 @@
 #define EXT4_FEATURE_INCOMPAT_64BIT		0x0080
 #define EXT4_FEATURE_INCOMPAT_MMP               0x0100
 #define EXT4_FEATURE_INCOMPAT_FLEX_BG		0x0200
+#define EXT4_FEATURE_INCOMPAT_EA_INODE		0x0400 /* EA in inode */
+#define EXT4_FEATURE_INCOMPAT_DIRDATA		0x1000 /* data in dirent */
 
 #define EXT4_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1416,7 +1446,7 @@
 				struct buffer_head *bh_result, int create);
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern int  ext4_write_inode(struct inode *, int);
+extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 				struct kstat *stat);
@@ -1439,7 +1469,7 @@
 		struct address_space *mapping, loff_t from);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
-extern int flush_aio_dio_completed_IO(struct inode *inode);
+extern int flush_completed_IO(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
 					int used, int quota_claim);
 /* ioctl.c */
@@ -1465,13 +1495,20 @@
 				ext4_fsblk_t n_blocks_count);
 
 /* super.c */
-extern void ext4_error(struct super_block *, const char *, const char *, ...)
+extern void __ext4_error(struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+#define ext4_error(sb, message...)	__ext4_error(sb, __func__, ## message)
+extern void ext4_error_inode(const char *, struct inode *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern void ext4_error_file(const char *, struct file *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
 extern void __ext4_std_error(struct super_block *, const char *, int);
 extern void ext4_abort(struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
-extern void ext4_warning(struct super_block *, const char *, const char *, ...)
+extern void __ext4_warning(struct super_block *, const char *,
+			  const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
+#define ext4_warning(sb, message...)	__ext4_warning(sb, __func__, ## message)
 extern void ext4_msg(struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
@@ -1744,7 +1781,7 @@
 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
 			  loff_t len);
 extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
-			  loff_t len);
+			  ssize_t len);
 extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
 			   sector_t block, unsigned int max_blocks,
 			   struct buffer_head *bh, int flags);
@@ -1756,6 +1793,15 @@
 			     __u64 len, __u64 *moved_len);
 
 
+/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
+enum ext4_state_bits {
+	BH_Uninit	/* blocks are allocated but uninitialized on disk */
+	  = BH_JBDPrivateStart,
+};
+
+BUFFER_FNS(Uninit, uninit)
+TAS_BUFFER_FNS(Uninit, uninit)
+
 /*
  * Add new method to test wether block and inode bitmaps are properly
  * initialized. With uninit_bg reading the block from disk is not enough
@@ -1773,6 +1819,8 @@
 	set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
 }
 
+#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
+
 #endif	/* __KERNEL__ */
 
 #endif	/* _EXT4_H */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b57e5c7..53d2764 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -125,14 +125,14 @@
 			ext4_journal_abort_handle(where, __func__, bh,
 						  handle, err);
 	} else {
-		if (inode && bh)
+		if (inode)
 			mark_buffer_dirty_inode(bh, inode);
 		else
 			mark_buffer_dirty(bh);
 		if (inode && inode_needs_sync(inode)) {
 			sync_dirty_buffer(bh);
 			if (buffer_req(bh) && !buffer_uptodate(bh)) {
-				ext4_error(inode->i_sb, __func__,
+				ext4_error(inode->i_sb,
 					   "IO error syncing inode, "
 					   "inode=%lu, block=%llu",
 					   inode->i_ino,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 05eca81..b79ad51 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -304,4 +304,28 @@
 	return 0;
 }
 
+/*
+ * This function controls whether or not we should try to go down the
+ * dioread_nolock code paths, which makes it safe to avoid taking
+ * i_mutex for direct I/O reads.  This only works for extent-based
+ * files, and it doesn't work for nobh or if data journaling is
+ * enabled, since the dioread_nolock code uses b_private to pass
+ * information back to the I/O completion handler, and this conflicts
+ * with the jbd's use of b_private.
+ */
+static inline int ext4_should_dioread_nolock(struct inode *inode)
+{
+	if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
+		return 0;
+	if (test_opt(inode->i_sb, NOBH))
+		return 0;
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+	if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+		return 0;
+	if (ext4_should_journal_data(inode))
+		return 0;
+	return 1;
+}
+
 #endif	/* _EXT4_JBD2_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 765a482..94c8ee8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -195,8 +195,7 @@
 		if (S_ISREG(inode->i_mode))
 			block_group++;
 	}
-	bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
-		le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
+	bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
 	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
 
 	/*
@@ -440,7 +439,7 @@
 	return 0;
 
 corrupted:
-	ext4_error(inode->i_sb, function,
+	__ext4_error(inode->i_sb, function,
 			"bad header/extent in inode #%lu: %s - magic %x, "
 			"entries %u, max %u(%u), depth %u(%u)",
 			inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
@@ -703,7 +702,12 @@
 		}
 		eh = ext_block_hdr(bh);
 		ppos++;
-		BUG_ON(ppos > depth);
+		if (unlikely(ppos > depth)) {
+			put_bh(bh);
+			EXT4_ERROR_INODE(inode,
+					 "ppos %d > depth %d", ppos, depth);
+			goto err;
+		}
 		path[ppos].p_bh = bh;
 		path[ppos].p_hdr = eh;
 		i--;
@@ -749,7 +753,12 @@
 	if (err)
 		return err;
 
-	BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
+	if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
+		EXT4_ERROR_INODE(inode,
+				 "logical %d == ei_block %d!",
+				 logical, le32_to_cpu(curp->p_idx->ei_block));
+		return -EIO;
+	}
 	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
 	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
 		/* insert after */
@@ -779,9 +788,17 @@
 	ext4_idx_store_pblock(ix, ptr);
 	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
 
-	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
-			     > le16_to_cpu(curp->p_hdr->eh_max));
-	BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
+	if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
+			     > le16_to_cpu(curp->p_hdr->eh_max))) {
+		EXT4_ERROR_INODE(inode,
+				 "logical %d == ei_block %d!",
+				 logical, le32_to_cpu(curp->p_idx->ei_block));
+		return -EIO;
+	}
+	if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
+		EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
+		return -EIO;
+	}
 
 	err = ext4_ext_dirty(handle, inode, curp);
 	ext4_std_error(inode->i_sb, err);
@@ -819,7 +836,10 @@
 
 	/* if current leaf will be split, then we should use
 	 * border from split point */
-	BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
+	if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
+		EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
+		return -EIO;
+	}
 	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
 		border = path[depth].p_ext[1].ee_block;
 		ext_debug("leaf will be split."
@@ -860,7 +880,11 @@
 
 	/* initialize new leaf */
 	newblock = ablocks[--a];
-	BUG_ON(newblock == 0);
+	if (unlikely(newblock == 0)) {
+		EXT4_ERROR_INODE(inode, "newblock == 0!");
+		err = -EIO;
+		goto cleanup;
+	}
 	bh = sb_getblk(inode->i_sb, newblock);
 	if (!bh) {
 		err = -EIO;
@@ -880,7 +904,14 @@
 	ex = EXT_FIRST_EXTENT(neh);
 
 	/* move remainder of path[depth] to the new leaf */
-	BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
+	if (unlikely(path[depth].p_hdr->eh_entries !=
+		     path[depth].p_hdr->eh_max)) {
+		EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
+				 path[depth].p_hdr->eh_entries,
+				 path[depth].p_hdr->eh_max);
+		err = -EIO;
+		goto cleanup;
+	}
 	/* start copy from next extent */
 	/* TODO: we could do it by single memmove */
 	m = 0;
@@ -927,7 +958,11 @@
 
 	/* create intermediate indexes */
 	k = depth - at - 1;
-	BUG_ON(k < 0);
+	if (unlikely(k < 0)) {
+		EXT4_ERROR_INODE(inode, "k %d < 0!", k);
+		err = -EIO;
+		goto cleanup;
+	}
 	if (k)
 		ext_debug("create %d intermediate indices\n", k);
 	/* insert new index into current index block */
@@ -964,8 +999,14 @@
 
 		ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
 				EXT_MAX_INDEX(path[i].p_hdr));
-		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
-				EXT_LAST_INDEX(path[i].p_hdr));
+		if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
+					EXT_LAST_INDEX(path[i].p_hdr))) {
+			EXT4_ERROR_INODE(inode,
+					 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
+					 le32_to_cpu(path[i].p_ext->ee_block));
+			err = -EIO;
+			goto cleanup;
+		}
 		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
 			ext_debug("%d: move %d:%llu in new index %llu\n", i,
 					le32_to_cpu(path[i].p_idx->ei_block),
@@ -1203,7 +1244,10 @@
 	struct ext4_extent *ex;
 	int depth, ee_len;
 
-	BUG_ON(path == NULL);
+	if (unlikely(path == NULL)) {
+		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
+		return -EIO;
+	}
 	depth = path->p_depth;
 	*phys = 0;
 
@@ -1217,15 +1261,33 @@
 	ex = path[depth].p_ext;
 	ee_len = ext4_ext_get_actual_len(ex);
 	if (*logical < le32_to_cpu(ex->ee_block)) {
-		BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
+			EXT4_ERROR_INODE(inode,
+					 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
+					 *logical, le32_to_cpu(ex->ee_block));
+			return -EIO;
+		}
 		while (--depth >= 0) {
 			ix = path[depth].p_idx;
-			BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
+				EXT4_ERROR_INODE(inode,
+				  "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
+				  ix != NULL ? ix->ei_block : 0,
+				  EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
+				    EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+				  depth);
+				return -EIO;
+			}
 		}
 		return 0;
 	}
 
-	BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+		EXT4_ERROR_INODE(inode,
+				 "logical %d < ee_block %d + ee_len %d!",
+				 *logical, le32_to_cpu(ex->ee_block), ee_len);
+		return -EIO;
+	}
 
 	*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
 	*phys = ext_pblock(ex) + ee_len - 1;
@@ -1251,7 +1313,10 @@
 	int depth;	/* Note, NOT eh_depth; depth from top of tree */
 	int ee_len;
 
-	BUG_ON(path == NULL);
+	if (unlikely(path == NULL)) {
+		EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
+		return -EIO;
+	}
 	depth = path->p_depth;
 	*phys = 0;
 
@@ -1265,17 +1330,32 @@
 	ex = path[depth].p_ext;
 	ee_len = ext4_ext_get_actual_len(ex);
 	if (*logical < le32_to_cpu(ex->ee_block)) {
-		BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+		if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
+			EXT4_ERROR_INODE(inode,
+					 "first_extent(path[%d].p_hdr) != ex",
+					 depth);
+			return -EIO;
+		}
 		while (--depth >= 0) {
 			ix = path[depth].p_idx;
-			BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+			if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
+				EXT4_ERROR_INODE(inode,
+						 "ix != EXT_FIRST_INDEX *logical %d!",
+						 *logical);
+				return -EIO;
+			}
 		}
 		*logical = le32_to_cpu(ex->ee_block);
 		*phys = ext_pblock(ex);
 		return 0;
 	}
 
-	BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+	if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+		EXT4_ERROR_INODE(inode,
+				 "logical %d < ee_block %d + ee_len %d!",
+				 *logical, le32_to_cpu(ex->ee_block), ee_len);
+		return -EIO;
+	}
 
 	if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
 		/* next allocated block in this leaf */
@@ -1414,8 +1494,12 @@
 
 	eh = path[depth].p_hdr;
 	ex = path[depth].p_ext;
-	BUG_ON(ex == NULL);
-	BUG_ON(eh == NULL);
+
+	if (unlikely(ex == NULL || eh == NULL)) {
+		EXT4_ERROR_INODE(inode,
+				 "ex %p == NULL or eh %p == NULL", ex, eh);
+		return -EIO;
+	}
 
 	if (depth == 0) {
 		/* there is no tree at all */
@@ -1538,8 +1622,9 @@
 		merge_done = 1;
 		WARN_ON(eh->eh_entries == 0);
 		if (!eh->eh_entries)
-			ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
-			   "inode#%lu, eh->eh_entries = 0!", inode->i_ino);
+			ext4_error(inode->i_sb,
+				   "inode#%lu, eh->eh_entries = 0!",
+				   inode->i_ino);
 	}
 
 	return merge_done;
@@ -1612,13 +1697,19 @@
 	ext4_lblk_t next;
 	unsigned uninitialized = 0;
 
-	BUG_ON(ext4_ext_get_actual_len(newext) == 0);
+	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
+		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
+		return -EIO;
+	}
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
-	BUG_ON(path[depth].p_hdr == NULL);
+	if (unlikely(path[depth].p_hdr == NULL)) {
+		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+		return -EIO;
+	}
 
 	/* try to insert block into found extent and return */
-	if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT)
+	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
 		&& ext4_can_extents_be_merged(inode, ex, newext)) {
 		ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
 				ext4_ext_is_uninitialized(newext),
@@ -1739,7 +1830,7 @@
 
 merge:
 	/* try to merge extents to the right */
-	if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT)
+	if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
 		ext4_ext_try_to_merge(inode, path, nearex);
 
 	/* try to merge extents to the left */
@@ -1787,7 +1878,11 @@
 		}
 
 		depth = ext_depth(inode);
-		BUG_ON(path[depth].p_hdr == NULL);
+		if (unlikely(path[depth].p_hdr == NULL)) {
+			EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+			err = -EIO;
+			break;
+		}
 		ex = path[depth].p_ext;
 		next = ext4_ext_next_allocated_block(path);
 
@@ -1838,7 +1933,11 @@
 			cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
 		}
 
-		BUG_ON(cbex.ec_len == 0);
+		if (unlikely(cbex.ec_len == 0)) {
+			EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
+			err = -EIO;
+			break;
+		}
 		err = func(inode, path, &cbex, ex, cbdata);
 		ext4_ext_drop_refs(path);
 
@@ -1952,7 +2051,7 @@
 
 	BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
 			cex->ec_type != EXT4_EXT_CACHE_EXTENT);
-	if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
+	if (in_range(block, cex->ec_block, cex->ec_len)) {
 		ex->ee_block = cpu_to_le32(cex->ec_block);
 		ext4_ext_store_pblock(ex, cex->ec_start);
 		ex->ee_len = cpu_to_le16(cex->ec_len);
@@ -1981,7 +2080,10 @@
 	/* free index block */
 	path--;
 	leaf = idx_pblock(path->p_idx);
-	BUG_ON(path->p_hdr->eh_entries == 0);
+	if (unlikely(path->p_hdr->eh_entries == 0)) {
+		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
+		return -EIO;
+	}
 	err = ext4_ext_get_access(handle, inode, path);
 	if (err)
 		return err;
@@ -2119,8 +2221,10 @@
 	if (!path[depth].p_hdr)
 		path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
 	eh = path[depth].p_hdr;
-	BUG_ON(eh == NULL);
-
+	if (unlikely(path[depth].p_hdr == NULL)) {
+		EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+		return -EIO;
+	}
 	/* find where to start removing */
 	ex = EXT_LAST_EXTENT(eh);
 
@@ -2983,7 +3087,7 @@
 	ext4_ext_dirty(handle, inode, path + depth);
 	return err;
 }
-static int ext4_convert_unwritten_extents_dio(handle_t *handle,
+static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 					      struct inode *inode,
 					      struct ext4_ext_path *path)
 {
@@ -3063,8 +3167,8 @@
 		  flags, allocated);
 	ext4_ext_show_leaf(inode, path);
 
-	/* DIO get_block() before submit the IO, split the extent */
-	if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
+	/* get_block() before submit the IO, split the extent */
+	if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
 		ret = ext4_split_unwritten_extents(handle,
 						inode, path, iblock,
 						max_blocks, flags);
@@ -3074,14 +3178,16 @@
 		 * completed
 		 */
 		if (io)
-			io->flag = DIO_AIO_UNWRITTEN;
+			io->flag = EXT4_IO_UNWRITTEN;
 		else
-			EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN;
+			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+		if (ext4_should_dioread_nolock(inode))
+			set_buffer_uninit(bh_result);
 		goto out;
 	}
-	/* async DIO end_io complete, convert the filled extent to written */
-	if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
-		ret = ext4_convert_unwritten_extents_dio(handle, inode,
+	/* IO end_io complete, convert the filled extent to written */
+	if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
+		ret = ext4_convert_unwritten_extents_endio(handle, inode,
 							path);
 		if (ret >= 0)
 			ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -3185,7 +3291,7 @@
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent_header *eh;
-	struct ext4_extent newex, *ex;
+	struct ext4_extent newex, *ex, *last_ex;
 	ext4_fsblk_t newblock;
 	int err = 0, depth, ret, cache_type;
 	unsigned int allocated = 0;
@@ -3237,10 +3343,10 @@
 	 * this situation is possible, though, _during_ tree modification;
 	 * this is why assert can't be put in ext4_ext_find_extent()
 	 */
-	if (path[depth].p_ext == NULL && depth != 0) {
-		ext4_error(inode->i_sb, __func__, "bad extent address "
-			   "inode: %lu, iblock: %d, depth: %d",
-			   inode->i_ino, iblock, depth);
+	if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
+		EXT4_ERROR_INODE(inode, "bad extent address "
+				 "iblock: %d, depth: %d pblock %lld",
+				 iblock, depth, path[depth].p_block);
 		err = -EIO;
 		goto out2;
 	}
@@ -3258,7 +3364,7 @@
 		 */
 		ee_len = ext4_ext_get_actual_len(ex);
 		/* if found extent covers block, simply return it */
-		if (iblock >= ee_block && iblock < ee_block + ee_len) {
+		if (in_range(iblock, ee_block, ee_len)) {
 			newblock = iblock - ee_block + ee_start;
 			/* number of remaining blocks in the extent */
 			allocated = ee_len - (iblock - ee_block);
@@ -3350,21 +3456,35 @@
 	if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
 		ext4_ext_mark_uninitialized(&newex);
 		/*
-		 * io_end structure was created for every async
-		 * direct IO write to the middle of the file.
-		 * To avoid unecessary convertion for every aio dio rewrite
-		 * to the mid of file, here we flag the IO that is really
-		 * need the convertion.
+		 * io_end structure was created for every IO write to an
+		 * uninitialized extent. To avoid unecessary conversion,
+		 * here we flag the IO that really needs the conversion.
 		 * For non asycn direct IO case, flag the inode state
 		 * that we need to perform convertion when IO is done.
 		 */
-		if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
+		if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
 			if (io)
-				io->flag = DIO_AIO_UNWRITTEN;
+				io->flag = EXT4_IO_UNWRITTEN;
 			else
-				EXT4_I(inode)->i_state |=
-					EXT4_STATE_DIO_UNWRITTEN;;
+				ext4_set_inode_state(inode,
+						     EXT4_STATE_DIO_UNWRITTEN);
 		}
+		if (ext4_should_dioread_nolock(inode))
+			set_buffer_uninit(bh_result);
+	}
+
+	if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
+		if (unlikely(!eh->eh_entries)) {
+			EXT4_ERROR_INODE(inode,
+					 "eh->eh_entries == 0 ee_block %d",
+					 ex->ee_block);
+			err = -EIO;
+			goto out2;
+		}
+		last_ex = EXT_LAST_EXTENT(eh);
+		if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+		    + ext4_ext_get_actual_len(last_ex))
+			EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
 	}
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err) {
@@ -3499,6 +3619,13 @@
 			i_size_write(inode, new_size);
 		if (new_size > EXT4_I(inode)->i_disksize)
 			ext4_update_i_disksize(inode, new_size);
+	} else {
+		/*
+		 * Mark that we allocate beyond EOF so the subsequent truncate
+		 * can proceed even if the new size is the same as i_size.
+		 */
+		if (new_size > i_size_read(inode))
+			EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
 	}
 
 }
@@ -3603,7 +3730,7 @@
  * Returns 0 on success.
  */
 int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
-				    loff_t len)
+				    ssize_t len)
 {
 	handle_t *handle;
 	ext4_lblk_t block;
@@ -3635,7 +3762,7 @@
 		map_bh.b_state = 0;
 		ret = ext4_get_blocks(handle, inode, block,
 				      max_blocks, &map_bh,
-				      EXT4_GET_BLOCKS_DIO_CONVERT_EXT);
+				      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
 		if (ret <= 0) {
 			WARN_ON(ret <= 0);
 			printk(KERN_ERR "%s: ext4_ext_get_blocks "
@@ -3739,7 +3866,7 @@
 	int error = 0;
 
 	/* in-inode? */
-	if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
 		struct ext4_iloc iloc;
 		int offset;	/* offset of xattr in inode */
 
@@ -3767,7 +3894,6 @@
 		__u64 start, __u64 len)
 {
 	ext4_lblk_t start_blk;
-	ext4_lblk_t len_blks;
 	int error = 0;
 
 	/* fallback to generic here if not in extents fmt */
@@ -3781,8 +3907,14 @@
 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
 		error = ext4_xattr_fiemap(inode, fieinfo);
 	} else {
+		ext4_lblk_t len_blks;
+		__u64 last_blk;
+
 		start_blk = start >> inode->i_sb->s_blocksize_bits;
-		len_blks = len >> inode->i_sb->s_blocksize_bits;
+		last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
+		if (last_blk >= EXT_MAX_BLOCK)
+			last_blk = EXT_MAX_BLOCK-1;
+		len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
 
 		/*
 		 * Walk the extent tree gathering extent information.
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 9630583..d0776e4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,6 +23,7 @@
 #include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
+#include <linux/quotaops.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -35,9 +36,9 @@
  */
 static int ext4_release_file(struct inode *inode, struct file *filp)
 {
-	if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
+	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
 		ext4_alloc_da_blocks(inode);
-		EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
+		ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 	}
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
@@ -116,18 +117,16 @@
 		 * devices or filesystem images.
 		 */
 		memset(buf, 0, sizeof(buf));
-		path.mnt = mnt->mnt_parent;
-		path.dentry = mnt->mnt_mountpoint;
-		path_get(&path);
+		path.mnt = mnt;
+		path.dentry = mnt->mnt_root;
 		cp = d_path(&path, buf, sizeof(buf));
-		path_put(&path);
 		if (!IS_ERR(cp)) {
 			memcpy(sbi->s_es->s_last_mounted, cp,
 			       sizeof(sbi->s_es->s_last_mounted));
 			sb->s_dirt = 1;
 		}
 	}
-	return generic_file_open(inode, filp);
+	return dquot_file_open(inode, filp);
 }
 
 const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 98bd140..0d0c323 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -63,7 +63,7 @@
 	if (inode->i_sb->s_flags & MS_RDONLY)
 		return 0;
 
-	ret = flush_aio_dio_completed_IO(inode);
+	ret = flush_completed_IO(inode);
 	if (ret < 0)
 		return ret;
 	
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f3624ea..361c0b9 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,8 +76,7 @@
 	/* If checksum is bad mark all blocks and inodes use to prevent
 	 * allocation, essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-		ext4_error(sb, __func__, "Checksum bad for group %u",
-			   block_group);
+		ext4_error(sb, "Checksum bad for group %u", block_group);
 		ext4_free_blks_set(sb, gdp, 0);
 		ext4_free_inodes_set(sb, gdp, 0);
 		ext4_itable_unused_set(sb, gdp, 0);
@@ -111,8 +110,7 @@
 	bitmap_blk = ext4_inode_bitmap(sb, desc);
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
-		ext4_error(sb, __func__,
-			    "Cannot read inode bitmap - "
+		ext4_error(sb, "Cannot read inode bitmap - "
 			    "block_group = %u, inode_bitmap = %llu",
 			    block_group, bitmap_blk);
 		return NULL;
@@ -153,8 +151,7 @@
 	set_bitmap_uptodate(bh);
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
-		ext4_error(sb, __func__,
-			    "Cannot read inode bitmap - "
+		ext4_error(sb, "Cannot read inode bitmap - "
 			    "block_group = %u, inode_bitmap = %llu",
 			    block_group, bitmap_blk);
 		return NULL;
@@ -217,10 +214,10 @@
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ext4_xattr_delete_inode(handle, inode);
-	vfs_dq_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_free_inode(inode);
+	dquot_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
 
@@ -229,8 +226,7 @@
 
 	es = EXT4_SB(sb)->s_es;
 	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-		ext4_error(sb, "ext4_free_inode",
-			   "reserved or nonexistent inode %lu", ino);
+		ext4_error(sb, "reserved or nonexistent inode %lu", ino);
 		goto error_return;
 	}
 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@@ -248,8 +244,7 @@
 	cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
 					bit, bitmap_bh->b_data);
 	if (!cleared)
-		ext4_error(sb, "ext4_free_inode",
-			   "bit already cleared for inode %lu", ino);
+		ext4_error(sb, "bit already cleared for inode %lu", ino);
 	else {
 		gdp = ext4_get_group_desc(sb, block_group, &bh2);
 
@@ -736,8 +731,7 @@
 	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
 			ino > EXT4_INODES_PER_GROUP(sb)) {
 		ext4_unlock_group(sb, group);
-		ext4_error(sb, __func__,
-			   "reserved inode or inode > inodes count - "
+		ext4_error(sb, "reserved inode or inode > inodes count - "
 			   "block_group = %u, inode=%lu", group,
 			   ino + group * EXT4_INODES_PER_GROUP(sb));
 		return 1;
@@ -904,7 +898,7 @@
 				BUFFER_TRACE(inode_bitmap_bh,
 					"call ext4_handle_dirty_metadata");
 				err = ext4_handle_dirty_metadata(handle,
-								 inode,
+								 NULL,
 							inode_bitmap_bh);
 				if (err)
 					goto fail;
@@ -1029,15 +1023,16 @@
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
 
-	ei->i_state = EXT4_STATE_NEW;
+	ei->i_state_flags = 0;
+	ext4_set_inode_state(inode, EXT4_STATE_NEW);
 
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
 
 	ret = inode;
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	dquot_initialize(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext4_init_acl(handle, inode, dir);
 	if (err)
@@ -1074,10 +1069,10 @@
 	return ret;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
@@ -1098,8 +1093,7 @@
 
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
-		ext4_warning(sb, __func__,
-			     "bad orphan ino %lu!  e2fsck was run?", ino);
+		ext4_warning(sb, "bad orphan ino %lu!  e2fsck was run?", ino);
 		goto error;
 	}
 
@@ -1107,8 +1101,7 @@
 	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh) {
-		ext4_warning(sb, __func__,
-			     "inode bitmap error for orphan %lu", ino);
+		ext4_warning(sb, "inode bitmap error for orphan %lu", ino);
 		goto error;
 	}
 
@@ -1140,8 +1133,7 @@
 	err = PTR_ERR(inode);
 	inode = NULL;
 bad_orphan:
-	ext4_warning(sb, __func__,
-		     "bad orphan inode %lu!  e2fsck was run?", ino);
+	ext4_warning(sb, "bad orphan inode %lu!  e2fsck was run?", ino);
 	printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
 	       bit, (unsigned long long)bitmap_bh->b_blocknr,
 	       ext4_test_bit(bit, bitmap_bh->b_data));
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e119524..986120f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include <linux/workqueue.h>
+#include <linux/kernel.h>
 
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -170,6 +171,9 @@
 	handle_t *handle;
 	int err;
 
+	if (!is_bad_inode(inode))
+		dquot_initialize(inode);
+
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
 	truncate_inode_pages(&inode->i_data, 0);
@@ -194,7 +198,7 @@
 	inode->i_size = 0;
 	err = ext4_mark_inode_dirty(handle, inode);
 	if (err) {
-		ext4_warning(inode->i_sb, __func__,
+		ext4_warning(inode->i_sb,
 			     "couldn't mark inode dirty (err %d)", err);
 		goto stop_handle;
 	}
@@ -212,7 +216,7 @@
 		if (err > 0)
 			err = ext4_journal_restart(handle, 3);
 		if (err != 0) {
-			ext4_warning(inode->i_sb, __func__,
+			ext4_warning(inode->i_sb,
 				     "couldn't extend journal (err %d)", err);
 		stop_handle:
 			ext4_journal_stop(handle);
@@ -323,8 +327,7 @@
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else {
-		ext4_warning(inode->i_sb, "ext4_block_to_path",
-			     "block %lu > max in inode %lu",
+		ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
 			     i_block + direct_blocks +
 			     indirect_blocks + double_blocks, inode->i_ino);
 	}
@@ -344,7 +347,7 @@
 		if (blk &&
 		    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
 						    blk, 1))) {
-			ext4_error(inode->i_sb, function,
+			__ext4_error(inode->i_sb, function,
 				   "invalid block reference %u "
 				   "in inode #%lu", blk, inode->i_ino);
 			return -EIO;
@@ -607,7 +610,14 @@
 		if (*err)
 			goto failed_out;
 
-		BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS);
+		if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
+			EXT4_ERROR_INODE(inode,
+					 "current_block %llu + count %lu > %d!",
+					 current_block, count,
+					 EXT4_MAX_BLOCK_FILE_PHYS);
+			*err = -EIO;
+			goto failed_out;
+		}
 
 		target -= count;
 		/* allocate blocks for indirect blocks */
@@ -643,7 +653,14 @@
 		ar.flags = EXT4_MB_HINT_DATA;
 
 	current_block = ext4_mb_new_blocks(handle, &ar, err);
-	BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS);
+	if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
+		EXT4_ERROR_INODE(inode,
+				 "current_block %llu + ar.len %d > %d!",
+				 current_block, ar.len,
+				 EXT4_MAX_BLOCK_FILE_PHYS);
+		*err = -EIO;
+		goto failed_out;
+	}
 
 	if (*err && (target == blks)) {
 		/*
@@ -1061,6 +1078,7 @@
 	int mdb_free = 0, allocated_meta_blocks = 0;
 
 	spin_lock(&ei->i_block_reservation_lock);
+	trace_ext4_da_update_reserve_space(inode, used);
 	if (unlikely(used > ei->i_reserved_data_blocks)) {
 		ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
 			 "with only %d reserved data blocks\n",
@@ -1093,9 +1111,9 @@
 
 	/* Update quota subsystem */
 	if (quota_claim) {
-		vfs_dq_claim_block(inode, used);
+		dquot_claim_block(inode, used);
 		if (mdb_free)
-			vfs_dq_release_reservation_block(inode, mdb_free);
+			dquot_release_reservation_block(inode, mdb_free);
 	} else {
 		/*
 		 * We did fallocate with an offset that is already delayed
@@ -1106,8 +1124,8 @@
 		 * that
 		 */
 		if (allocated_meta_blocks)
-			vfs_dq_claim_block(inode, allocated_meta_blocks);
-		vfs_dq_release_reservation_block(inode, mdb_free + used);
+			dquot_claim_block(inode, allocated_meta_blocks);
+		dquot_release_reservation_block(inode, mdb_free + used);
 	}
 
 	/*
@@ -1124,7 +1142,7 @@
 				sector_t logical, sector_t phys, int len)
 {
 	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
-		ext4_error(inode->i_sb, msg,
+		__ext4_error(inode->i_sb, msg,
 			   "inode #%lu logical block %llu mapped to %llu "
 			   "(size %d)", inode->i_ino,
 			   (unsigned long long) logical,
@@ -1306,7 +1324,7 @@
 			 * i_data's format changing.  Force the migrate
 			 * to fail by clearing migrate flags
 			 */
-			EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
+			ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 		}
 
 		/*
@@ -1534,6 +1552,8 @@
 	ext4_truncate(inode);
 }
 
+static int ext4_get_block_write(struct inode *inode, sector_t iblock,
+		   struct buffer_head *bh_result, int create);
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
 			    loff_t pos, unsigned len, unsigned flags,
 			    struct page **pagep, void **fsdata)
@@ -1575,8 +1595,12 @@
 	}
 	*pagep = page;
 
-	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				ext4_get_block);
+	if (ext4_should_dioread_nolock(inode))
+		ret = block_write_begin(file, mapping, pos, len, flags, pagep,
+				fsdata, ext4_get_block_write);
+	else
+		ret = block_write_begin(file, mapping, pos, len, flags, pagep,
+				fsdata, ext4_get_block);
 
 	if (!ret && ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
@@ -1793,7 +1817,7 @@
 	new_i_size = pos + copied;
 	if (new_i_size > inode->i_size)
 		i_size_write(inode, pos+copied);
-	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 	if (new_i_size > EXT4_I(inode)->i_disksize) {
 		ext4_update_i_disksize(inode, new_i_size);
 		ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -1836,6 +1860,7 @@
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned long md_needed, md_reserved;
+	int ret;
 
 	/*
 	 * recalculate the amount of metadata blocks to reserve
@@ -1846,6 +1871,7 @@
 	spin_lock(&ei->i_block_reservation_lock);
 	md_reserved = ei->i_reserved_meta_blocks;
 	md_needed = ext4_calc_metadata_amount(inode, lblock);
+	trace_ext4_da_reserve_space(inode, md_needed);
 	spin_unlock(&ei->i_block_reservation_lock);
 
 	/*
@@ -1853,11 +1879,12 @@
 	 * later. Real quota accounting is done at pages writeout
 	 * time.
 	 */
-	if (vfs_dq_reserve_block(inode, md_needed + 1))
-		return -EDQUOT;
+	ret = dquot_reserve_block(inode, md_needed + 1);
+	if (ret)
+		return ret;
 
 	if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
-		vfs_dq_release_reservation_block(inode, md_needed + 1);
+		dquot_release_reservation_block(inode, md_needed + 1);
 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
 			yield();
 			goto repeat;
@@ -1914,7 +1941,7 @@
 
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-	vfs_dq_release_reservation_block(inode, to_free);
+	dquot_release_reservation_block(inode, to_free);
 }
 
 static void ext4_da_page_release_reservation(struct page *page,
@@ -2091,6 +2118,8 @@
 				} else if (buffer_mapped(bh))
 					BUG_ON(bh->b_blocknr != pblock);
 
+				if (buffer_uninit(exbh))
+					set_buffer_uninit(bh);
 				cur_logical++;
 				pblock++;
 			} while ((bh = bh->b_this_page) != head);
@@ -2133,17 +2162,16 @@
 			break;
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
-			index = page->index;
-			if (index > end)
+			if (page->index > end)
 				break;
-			index++;
-
 			BUG_ON(!PageLocked(page));
 			BUG_ON(PageWriteback(page));
 			block_invalidatepage(page, 0);
 			ClearPageUptodate(page);
 			unlock_page(page);
 		}
+		index = pvec.pages[nr_pages - 1]->index + 1;
+		pagevec_release(&pvec);
 	}
 	return;
 }
@@ -2220,6 +2248,8 @@
 	 */
 	new.b_state = 0;
 	get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
+	if (ext4_should_dioread_nolock(mpd->inode))
+		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
 	if (mpd->b_state & (1 << BH_Delay))
 		get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
 
@@ -2630,11 +2660,14 @@
 		ret = err;
 
 	walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
-	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 out:
 	return ret;
 }
 
+static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
+static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
+
 /*
  * Note that we don't need to start a transaction unless we're journaling data
  * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2682,7 +2715,7 @@
 	int ret = 0;
 	loff_t size;
 	unsigned int len;
-	struct buffer_head *page_bufs;
+	struct buffer_head *page_bufs = NULL;
 	struct inode *inode = page->mapping->host;
 
 	trace_ext4_writepage(inode, page);
@@ -2758,7 +2791,11 @@
 
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_writepage(page, noalloc_get_block_write, wbc);
-	else
+	else if (page_bufs && buffer_uninit(page_bufs)) {
+		ext4_set_bh_endio(page_bufs, inode);
+		ret = block_write_full_page_endio(page, noalloc_get_block_write,
+					    wbc, ext4_end_io_buffer_write);
+	} else
 		ret = block_write_full_page(page, noalloc_get_block_write,
 					    wbc);
 
@@ -3301,7 +3338,8 @@
 		filemap_write_and_wait(mapping);
 	}
 
-	if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
+	if (EXT4_JOURNAL(inode) &&
+	    ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
 		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
@@ -3320,7 +3358,7 @@
 		 * everything they get.
 		 */
 
-		EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA;
+		ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
 		journal = EXT4_JOURNAL(inode);
 		jbd2_journal_lock_updates(journal);
 		err = jbd2_journal_flush(journal);
@@ -3345,11 +3383,45 @@
 	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 }
 
+static void ext4_free_io_end(ext4_io_end_t *io)
+{
+	BUG_ON(!io);
+	if (io->page)
+		put_page(io->page);
+	iput(io->inode);
+	kfree(io);
+}
+
+static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
+{
+	struct buffer_head *head, *bh;
+	unsigned int curr_off = 0;
+
+	if (!page_has_buffers(page))
+		return;
+	head = bh = page_buffers(page);
+	do {
+		if (offset <= curr_off && test_clear_buffer_uninit(bh)
+					&& bh->b_private) {
+			ext4_free_io_end(bh->b_private);
+			bh->b_private = NULL;
+			bh->b_end_io = NULL;
+		}
+		curr_off = curr_off + bh->b_size;
+		bh = bh->b_this_page;
+	} while (bh != head);
+}
+
 static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
 	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
 	/*
+	 * free any io_end structure allocated for buffers to be discarded
+	 */
+	if (ext4_should_dioread_nolock(page->mapping->host))
+		ext4_invalidatepage_free_endio(page, offset);
+	/*
 	 * If it's a full truncate we just forget about the pending dirtying
 	 */
 	if (offset == 0)
@@ -3420,7 +3492,14 @@
 	}
 
 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	if (rw == READ && ext4_should_dioread_nolock(inode))
+		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
+				 inode->i_sb->s_bdev, iov,
+				 offset, nr_segs,
+				 ext4_get_block, NULL);
+	else
+		ret = blockdev_direct_IO(rw, iocb, inode,
+				 inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
 				 ext4_get_block, NULL);
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3436,6 +3515,9 @@
 			 * but cannot extend i_size. Bail out and pretend
 			 * the write failed... */
 			ret = PTR_ERR(handle);
+			if (inode->i_nlink)
+				ext4_orphan_del(NULL, inode);
+
 			goto out;
 		}
 		if (inode->i_nlink)
@@ -3463,75 +3545,63 @@
 	return ret;
 }
 
-static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock,
+static int ext4_get_block_write(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh_result, int create)
 {
-	handle_t *handle = NULL;
+	handle_t *handle = ext4_journal_current_handle();
 	int ret = 0;
 	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 	int dio_credits;
+	int started = 0;
 
-	ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n",
+	ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
 		   inode->i_ino, create);
 	/*
-	 * DIO VFS code passes create = 0 flag for write to
-	 * the middle of file. It does this to avoid block
-	 * allocation for holes, to prevent expose stale data
-	 * out when there is parallel buffered read (which does
-	 * not hold the i_mutex lock) while direct IO write has
-	 * not completed. DIO request on holes finally falls back
-	 * to buffered IO for this reason.
-	 *
-	 * For ext4 extent based file, since we support fallocate,
-	 * new allocated extent as uninitialized, for holes, we
-	 * could fallocate blocks for holes, thus parallel
-	 * buffered IO read will zero out the page when read on
-	 * a hole while parallel DIO write to the hole has not completed.
-	 *
-	 * when we come here, we know it's a direct IO write to
-	 * to the middle of file (<i_size)
-	 * so it's safe to override the create flag from VFS.
+	 * ext4_get_block in prepare for a DIO write or buffer write.
+	 * We allocate an uinitialized extent if blocks haven't been allocated.
+	 * The extent will be converted to initialized after IO complete.
 	 */
-	create = EXT4_GET_BLOCKS_DIO_CREATE_EXT;
+	create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
 
-	if (max_blocks > DIO_MAX_BLOCKS)
-		max_blocks = DIO_MAX_BLOCKS;
-	dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
-	handle = ext4_journal_start(inode, dio_credits);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
+	if (!handle) {
+		if (max_blocks > DIO_MAX_BLOCKS)
+			max_blocks = DIO_MAX_BLOCKS;
+		dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
+		handle = ext4_journal_start(inode, dio_credits);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			goto out;
+		}
+		started = 1;
 	}
+
 	ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
 			      create);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
 	}
-	ext4_journal_stop(handle);
+	if (started)
+		ext4_journal_stop(handle);
 out:
 	return ret;
 }
 
-static void ext4_free_io_end(ext4_io_end_t *io)
-{
-	BUG_ON(!io);
-	iput(io->inode);
-	kfree(io);
-}
-static void dump_aio_dio_list(struct inode * inode)
+static void dump_completed_IO(struct inode * inode)
 {
 #ifdef	EXT4_DEBUG
 	struct list_head *cur, *before, *after;
 	ext4_io_end_t *io, *io0, *io1;
+	unsigned long flags;
 
-	if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){
-		ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino);
+	if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
+		ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
 		return;
 	}
 
-	ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino);
-	list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){
+	ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
+	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+	list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
 		cur = &io->list;
 		before = cur->prev;
 		io0 = container_of(before, ext4_io_end_t, list);
@@ -3541,32 +3611,31 @@
 		ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
 			    io, inode->i_ino, io0, io1);
 	}
+	spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
 #endif
 }
 
 /*
  * check a range of space and convert unwritten extents to written.
  */
-static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
+static int ext4_end_io_nolock(ext4_io_end_t *io)
 {
 	struct inode *inode = io->inode;
 	loff_t offset = io->offset;
-	size_t size = io->size;
+	ssize_t size = io->size;
 	int ret = 0;
 
-	ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p,"
+	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
 		   "list->prev 0x%p\n",
 	           io, inode->i_ino, io->list.next, io->list.prev);
 
 	if (list_empty(&io->list))
 		return ret;
 
-	if (io->flag != DIO_AIO_UNWRITTEN)
+	if (io->flag != EXT4_IO_UNWRITTEN)
 		return ret;
 
-	if (offset + size <= i_size_read(inode))
-		ret = ext4_convert_unwritten_extents(inode, offset, size);
-
+	ret = ext4_convert_unwritten_extents(inode, offset, size);
 	if (ret < 0) {
 		printk(KERN_EMERG "%s: failed to convert unwritten"
 			"extents to written extents, error is %d"
@@ -3579,50 +3648,64 @@
 	io->flag = 0;
 	return ret;
 }
+
 /*
  * work on completed aio dio IO, to convert unwritten extents to extents
  */
-static void ext4_end_aio_dio_work(struct work_struct *work)
+static void ext4_end_io_work(struct work_struct *work)
 {
-	ext4_io_end_t *io  = container_of(work, ext4_io_end_t, work);
-	struct inode *inode = io->inode;
-	int ret = 0;
+	ext4_io_end_t		*io = container_of(work, ext4_io_end_t, work);
+	struct inode		*inode = io->inode;
+	struct ext4_inode_info	*ei = EXT4_I(inode);
+	unsigned long		flags;
+	int			ret;
 
 	mutex_lock(&inode->i_mutex);
-	ret = ext4_end_aio_dio_nolock(io);
-	if (ret >= 0) {
-		if (!list_empty(&io->list))
-			list_del_init(&io->list);
-		ext4_free_io_end(io);
+	ret = ext4_end_io_nolock(io);
+	if (ret < 0) {
+		mutex_unlock(&inode->i_mutex);
+		return;
 	}
+
+	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+	if (!list_empty(&io->list))
+		list_del_init(&io->list);
+	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 	mutex_unlock(&inode->i_mutex);
+	ext4_free_io_end(io);
 }
+
 /*
  * This function is called from ext4_sync_file().
  *
- * When AIO DIO IO is completed, the work to convert unwritten
- * extents to written is queued on workqueue but may not get immediately
+ * When IO is completed, the work to convert unwritten extents to
+ * written is queued on workqueue but may not get immediately
  * scheduled. When fsync is called, we need to ensure the
  * conversion is complete before fsync returns.
- * The inode keeps track of a list of completed AIO from DIO path
- * that might needs to do the conversion. This function walks through
- * the list and convert the related unwritten extents to written.
+ * The inode keeps track of a list of pending/completed IO that
+ * might needs to do the conversion. This function walks through
+ * the list and convert the related unwritten extents for completed IO
+ * to written.
+ * The function return the number of pending IOs on success.
  */
-int flush_aio_dio_completed_IO(struct inode *inode)
+int flush_completed_IO(struct inode *inode)
 {
 	ext4_io_end_t *io;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned long flags;
 	int ret = 0;
 	int ret2 = 0;
 
-	if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list))
+	if (list_empty(&ei->i_completed_io_list))
 		return ret;
 
-	dump_aio_dio_list(inode);
-	while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){
-		io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next,
+	dump_completed_IO(inode);
+	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+	while (!list_empty(&ei->i_completed_io_list)){
+		io = list_entry(ei->i_completed_io_list.next,
 				ext4_io_end_t, list);
 		/*
-		 * Calling ext4_end_aio_dio_nolock() to convert completed
+		 * Calling ext4_end_io_nolock() to convert completed
 		 * IO to written.
 		 *
 		 * When ext4_sync_file() is called, run_queue() may already
@@ -3635,20 +3718,23 @@
 		 * avoid double converting from both fsync and background work
 		 * queue work.
 		 */
-		ret = ext4_end_aio_dio_nolock(io);
+		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+		ret = ext4_end_io_nolock(io);
+		spin_lock_irqsave(&ei->i_completed_io_lock, flags);
 		if (ret < 0)
 			ret2 = ret;
 		else
 			list_del_init(&io->list);
 	}
+	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 	return (ret2 < 0) ? ret2 : 0;
 }
 
-static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
+static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
 {
 	ext4_io_end_t *io = NULL;
 
-	io = kmalloc(sizeof(*io), GFP_NOFS);
+	io = kmalloc(sizeof(*io), flags);
 
 	if (io) {
 		igrab(inode);
@@ -3656,8 +3742,8 @@
 		io->flag = 0;
 		io->offset = 0;
 		io->size = 0;
-		io->error = 0;
-		INIT_WORK(&io->work, ext4_end_aio_dio_work);
+		io->page = NULL;
+		INIT_WORK(&io->work, ext4_end_io_work);
 		INIT_LIST_HEAD(&io->list);
 	}
 
@@ -3669,6 +3755,8 @@
 {
         ext4_io_end_t *io_end = iocb->private;
 	struct workqueue_struct *wq;
+	unsigned long flags;
+	struct ext4_inode_info *ei;
 
 	/* if not async direct IO or dio with 0 bytes write, just return */
 	if (!io_end || !size)
@@ -3680,7 +3768,7 @@
 		  size);
 
 	/* if not aio dio with unwritten extents, just free io and return */
-	if (io_end->flag != DIO_AIO_UNWRITTEN){
+	if (io_end->flag != EXT4_IO_UNWRITTEN){
 		ext4_free_io_end(io_end);
 		iocb->private = NULL;
 		return;
@@ -3688,16 +3776,85 @@
 
 	io_end->offset = offset;
 	io_end->size = size;
+	io_end->flag = EXT4_IO_UNWRITTEN;
 	wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
 
 	/* queue the work to convert unwritten extents to written */
 	queue_work(wq, &io_end->work);
 
 	/* Add the io_end to per-inode completed aio dio list*/
-	list_add_tail(&io_end->list,
-		 &EXT4_I(io_end->inode)->i_aio_dio_complete_list);
+	ei = EXT4_I(io_end->inode);
+	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+	list_add_tail(&io_end->list, &ei->i_completed_io_list);
+	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
 	iocb->private = NULL;
 }
+
+static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
+{
+	ext4_io_end_t *io_end = bh->b_private;
+	struct workqueue_struct *wq;
+	struct inode *inode;
+	unsigned long flags;
+
+	if (!test_clear_buffer_uninit(bh) || !io_end)
+		goto out;
+
+	if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
+		printk("sb umounted, discard end_io request for inode %lu\n",
+			io_end->inode->i_ino);
+		ext4_free_io_end(io_end);
+		goto out;
+	}
+
+	io_end->flag = EXT4_IO_UNWRITTEN;
+	inode = io_end->inode;
+
+	/* Add the io_end to per-inode completed io list*/
+	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+	list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
+	spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+
+	wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
+	/* queue the work to convert unwritten extents to written */
+	queue_work(wq, &io_end->work);
+out:
+	bh->b_private = NULL;
+	bh->b_end_io = NULL;
+	clear_buffer_uninit(bh);
+	end_buffer_async_write(bh, uptodate);
+}
+
+static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
+{
+	ext4_io_end_t *io_end;
+	struct page *page = bh->b_page;
+	loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
+	size_t size = bh->b_size;
+
+retry:
+	io_end = ext4_init_io_end(inode, GFP_ATOMIC);
+	if (!io_end) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING "%s: allocation fail\n", __func__);
+		schedule();
+		goto retry;
+	}
+	io_end->offset = offset;
+	io_end->size = size;
+	/*
+	 * We need to hold a reference to the page to make sure it
+	 * doesn't get evicted before ext4_end_io_work() has a chance
+	 * to convert the extent from written to unwritten.
+	 */
+	io_end->page = page;
+	get_page(io_end->page);
+
+	bh->b_private = io_end;
+	bh->b_end_io = ext4_end_io_buffer_write;
+	return 0;
+}
+
 /*
  * For ext4 extent files, ext4 will do direct-io write to holes,
  * preallocated extents, and those write extend the file, no need to
@@ -3751,7 +3908,7 @@
 		iocb->private = NULL;
 		EXT4_I(inode)->cur_aio_dio = NULL;
 		if (!is_sync_kiocb(iocb)) {
-			iocb->private = ext4_init_io_end(inode);
+			iocb->private = ext4_init_io_end(inode, GFP_NOFS);
 			if (!iocb->private)
 				return -ENOMEM;
 			/*
@@ -3767,7 +3924,7 @@
 		ret = blockdev_direct_IO(rw, iocb, inode,
 					 inode->i_sb->s_bdev, iov,
 					 offset, nr_segs,
-					 ext4_get_block_dio_write,
+					 ext4_get_block_write,
 					 ext4_end_io_dio);
 		if (iocb->private)
 			EXT4_I(inode)->cur_aio_dio = NULL;
@@ -3788,8 +3945,8 @@
 		if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
 			ext4_free_io_end(iocb->private);
 			iocb->private = NULL;
-		} else if (ret > 0 && (EXT4_I(inode)->i_state &
-				       EXT4_STATE_DIO_UNWRITTEN)) {
+		} else if (ret > 0 && ext4_test_inode_state(inode,
+						EXT4_STATE_DIO_UNWRITTEN)) {
 			int err;
 			/*
 			 * for non AIO case, since the IO is already
@@ -3799,7 +3956,7 @@
 							     offset, ret);
 			if (err < 0)
 				ret = err;
-			EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN;
+			ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
 		}
 		return ret;
 	}
@@ -4130,18 +4287,27 @@
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
  */
-static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
-			      struct buffer_head *bh,
-			      ext4_fsblk_t block_to_free,
-			      unsigned long count, __le32 *first,
-			      __le32 *last)
+static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
+			     struct buffer_head *bh,
+			     ext4_fsblk_t block_to_free,
+			     unsigned long count, __le32 *first,
+			     __le32 *last)
 {
 	__le32 *p;
-	int	flags = EXT4_FREE_BLOCKS_FORGET;
+	int	flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
 
 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
 		flags |= EXT4_FREE_BLOCKS_METADATA;
 
+	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
+				   count)) {
+		ext4_error(inode->i_sb, "inode #%lu: "
+			   "attempt to clear blocks %llu len %lu, invalid",
+			   inode->i_ino, (unsigned long long) block_to_free,
+			   count);
+		return 1;
+	}
+
 	if (try_to_extend_transaction(handle, inode)) {
 		if (bh) {
 			BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4160,6 +4326,7 @@
 		*p = 0;
 
 	ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
+	return 0;
 }
 
 /**
@@ -4215,9 +4382,10 @@
 			} else if (nr == block_to_free + count) {
 				count++;
 			} else {
-				ext4_clear_blocks(handle, inode, this_bh,
-						  block_to_free,
-						  count, block_to_free_p, p);
+				if (ext4_clear_blocks(handle, inode, this_bh,
+						      block_to_free, count,
+						      block_to_free_p, p))
+					break;
 				block_to_free = nr;
 				block_to_free_p = p;
 				count = 1;
@@ -4241,7 +4409,7 @@
 		if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
 			ext4_handle_dirty_metadata(handle, inode, this_bh);
 		else
-			ext4_error(inode->i_sb, __func__,
+			ext4_error(inode->i_sb,
 				   "circular indirect block detected, "
 				   "inode=%lu, block=%llu",
 				   inode->i_ino,
@@ -4281,6 +4449,16 @@
 			if (!nr)
 				continue;		/* A hole */
 
+			if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
+						   nr, 1)) {
+				ext4_error(inode->i_sb,
+					   "indirect mapped block in inode "
+					   "#%lu invalid (level %d, blk #%lu)",
+					   inode->i_ino, depth,
+					   (unsigned long) nr);
+				break;
+			}
+
 			/* Go read the buffer for the next level down */
 			bh = sb_bread(inode->i_sb, nr);
 
@@ -4289,7 +4467,7 @@
 			 * (should be rare).
 			 */
 			if (!bh) {
-				ext4_error(inode->i_sb, "ext4_free_branches",
+				ext4_error(inode->i_sb,
 					   "Read failure, inode=%lu, block=%llu",
 					   inode->i_ino, nr);
 				continue;
@@ -4433,8 +4611,10 @@
 	if (!ext4_can_truncate(inode))
 		return;
 
+	EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+
 	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
-		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
+		ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		ext4_ext_truncate(inode);
@@ -4604,9 +4784,8 @@
 
 	bh = sb_getblk(sb, block);
 	if (!bh) {
-		ext4_error(sb, "ext4_get_inode_loc", "unable to read "
-			   "inode block - inode=%lu, block=%llu",
-			   inode->i_ino, block);
+		ext4_error(sb, "unable to read inode block - "
+			   "inode=%lu, block=%llu", inode->i_ino, block);
 		return -EIO;
 	}
 	if (!buffer_uptodate(bh)) {
@@ -4704,9 +4883,8 @@
 		submit_bh(READ_META, bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
-			ext4_error(sb, __func__,
-				   "unable to read inode block - inode=%lu, "
-				   "block=%llu", inode->i_ino, block);
+			ext4_error(sb, "unable to read inode block - inode=%lu,"
+				   " block=%llu", inode->i_ino, block);
 			brelse(bh);
 			return -EIO;
 		}
@@ -4720,7 +4898,7 @@
 {
 	/* We have all inode data except xattrs in memory here. */
 	return __ext4_get_inode_loc(inode, iloc,
-		!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR));
+		!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
 }
 
 void ext4_set_inode_flags(struct inode *inode)
@@ -4814,7 +4992,7 @@
 	}
 	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
 
-	ei->i_state = 0;
+	ei->i_state_flags = 0;
 	ei->i_dir_start_lookup = 0;
 	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
 	/* We now have enough fields to check if the inode was active or not.
@@ -4897,7 +5075,7 @@
 					EXT4_GOOD_OLD_INODE_SIZE +
 					ei->i_extra_isize;
 			if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
-				ei->i_state |= EXT4_STATE_XATTR;
+				ext4_set_inode_state(inode, EXT4_STATE_XATTR);
 		}
 	} else
 		ei->i_extra_isize = 0;
@@ -4917,8 +5095,7 @@
 	ret = 0;
 	if (ei->i_file_acl &&
 	    !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
-		ext4_error(sb, __func__,
-			   "bad extended attribute block %llu in inode #%lu",
+		ext4_error(sb, "bad extended attribute block %llu inode #%lu",
 			   ei->i_file_acl, inode->i_ino);
 		ret = -EIO;
 		goto bad_inode;
@@ -4964,8 +5141,7 @@
 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
 	} else {
 		ret = -EIO;
-		ext4_error(inode->i_sb, __func__,
-			   "bogus i_mode (%o) for inode=%lu",
+		ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
 			   inode->i_mode, inode->i_ino);
 		goto bad_inode;
 	}
@@ -5037,7 +5213,7 @@
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
-	if (ei->i_state & EXT4_STATE_NEW)
+	if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
 		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
 
 	ext4_get_inode_flags(ei);
@@ -5101,7 +5277,7 @@
 					EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
 			sb->s_dirt = 1;
 			ext4_handle_sync(handle);
-			err = ext4_handle_dirty_metadata(handle, inode,
+			err = ext4_handle_dirty_metadata(handle, NULL,
 					EXT4_SB(sb)->s_sbh);
 		}
 	}
@@ -5130,10 +5306,10 @@
 	}
 
 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-	rc = ext4_handle_dirty_metadata(handle, inode, bh);
+	rc = ext4_handle_dirty_metadata(handle, NULL, bh);
 	if (!err)
 		err = rc;
-	ei->i_state &= ~EXT4_STATE_NEW;
+	ext4_clear_inode_state(inode, EXT4_STATE_NEW);
 
 	ext4_update_inode_fsync_trans(handle, inode, 0);
 out_brelse:
@@ -5177,7 +5353,7 @@
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext4_write_inode(struct inode *inode, int wait)
+int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err;
 
@@ -5191,7 +5367,7 @@
 			return -EIO;
 		}
 
-		if (!wait)
+		if (wbc->sync_mode != WB_SYNC_ALL)
 			return 0;
 
 		err = ext4_force_commit(inode->i_sb);
@@ -5201,13 +5377,11 @@
 		err = ext4_get_inode_loc(inode, &iloc);
 		if (err)
 			return err;
-		if (wait)
+		if (wbc->sync_mode == WB_SYNC_ALL)
 			sync_dirty_buffer(iloc.bh);
 		if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-			ext4_error(inode->i_sb, __func__,
-				   "IO error syncing inode, "
-				   "inode=%lu, block=%llu",
-				   inode->i_ino,
+			ext4_error(inode->i_sb, "IO error syncing inode, "
+				   "inode=%lu, block=%llu", inode->i_ino,
 				   (unsigned long long)iloc.bh->b_blocknr);
 			err = -EIO;
 		}
@@ -5249,6 +5423,8 @@
 	if (error)
 		return error;
 
+	if (ia_valid & ATTR_SIZE)
+		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
@@ -5261,7 +5437,7 @@
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error) {
 			ext4_journal_stop(handle);
 			return error;
@@ -5288,7 +5464,9 @@
 	}
 
 	if (S_ISREG(inode->i_mode) &&
-	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+	    attr->ia_valid & ATTR_SIZE &&
+	    (attr->ia_size < inode->i_size ||
+	     (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
 		handle_t *handle;
 
 		handle = ext4_journal_start(inode, 3);
@@ -5319,6 +5497,9 @@
 				goto err_out;
 			}
 		}
+		/* ext4_truncate will clear the flag */
+		if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
+			ext4_truncate(inode);
 	}
 
 	rc = inode_setattr(inode, attr);
@@ -5557,8 +5738,8 @@
 	entry = IFIRST(header);
 
 	/* No extended attributes present */
-	if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) ||
-		header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
+	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
+	    header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
 		memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
 			new_extra_isize);
 		EXT4_I(inode)->i_extra_isize = new_extra_isize;
@@ -5602,7 +5783,7 @@
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (ext4_handle_valid(handle) &&
 	    EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
-	    !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
+	    !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
 		/*
 		 * We need extra buffer credits since we may write into EA block
 		 * with this same handle. If journal_extend fails, then it will
@@ -5616,10 +5797,11 @@
 						      sbi->s_want_extra_isize,
 						      iloc, handle);
 			if (ret) {
-				EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
+				ext4_set_inode_state(inode,
+						     EXT4_STATE_NO_EXPAND);
 				if (mnt_count !=
 					le16_to_cpu(sbi->s_es->s_mnt_count)) {
-					ext4_warning(inode->i_sb, __func__,
+					ext4_warning(inode->i_sb,
 					"Unable to expand inode %lu. Delete"
 					" some EAs or run e2fsck.",
 					inode->i_ino);
@@ -5641,7 +5823,7 @@
  * i_size has been changed by generic_commit_write() and we thus need
  * to include the updated inode in the current transaction.
  *
- * Also, vfs_dq_alloc_block() will always dirty the inode when blocks
+ * Also, dquot_alloc_block() will always dirty the inode when blocks
  * are allocated to the file.
  *
  * If the inode is marked synchronous, we don't honour that here - doing
@@ -5683,7 +5865,7 @@
 			err = jbd2_journal_get_write_access(handle, iloc.bh);
 			if (!err)
 				err = ext4_handle_dirty_metadata(handle,
-								 inode,
+								 NULL,
 								 iloc.bh);
 			brelse(iloc.bh);
 		}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b63d193..016d024 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -92,6 +92,15 @@
 			flags &= ~EXT4_EXTENTS_FL;
 		}
 
+		if (flags & EXT4_EOFBLOCKS_FL) {
+			/* we don't support adding EOFBLOCKS flag */
+			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+				err = -EOPNOTSUPP;
+				goto flags_out;
+			}
+		} else if (oldflags & EXT4_EOFBLOCKS_FL)
+			ext4_truncate(inode);
+
 		handle = ext4_journal_start(inode, 1);
 		if (IS_ERR(handle)) {
 			err = PTR_ERR(handle);
@@ -249,7 +258,8 @@
 		if (me.moved_len > 0)
 			file_remove_suid(donor_filp);
 
-		if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
+		if (copy_to_user((struct move_extent __user *)arg, 
+				 &me, sizeof(me)))
 			err = -EFAULT;
 mext_out:
 		fput(donor_filp);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d34afad..506713a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -441,10 +441,9 @@
 	for (i = 0; i < count; i++) {
 		if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
 			ext4_fsblk_t blocknr;
-			blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
+
+			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
 			blocknr += first + i;
-			blocknr +=
-			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 			ext4_grp_locked_error(sb, e4b->bd_group,
 				   __func__, "double-free of inode"
 				   " %lu's block %llu(bit %u in group %u)",
@@ -1255,10 +1254,9 @@
 
 		if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
 			ext4_fsblk_t blocknr;
-			blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
+
+			blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
 			blocknr += block;
-			blocknr +=
-			    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 			ext4_grp_locked_error(sb, e4b->bd_group,
 				   __func__, "double-free of inode"
 				   " %lu's block %llu(bit %u in group %u)",
@@ -1631,7 +1629,6 @@
 	int max;
 	int err;
 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
-	struct ext4_super_block *es = sbi->s_es;
 	struct ext4_free_extent ex;
 
 	if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
@@ -1648,8 +1645,8 @@
 	if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
 		ext4_fsblk_t start;
 
-		start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) +
-			ex.fe_start + le32_to_cpu(es->s_first_data_block);
+		start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
+			ex.fe_start;
 		/* use do_div to get remainder (would be 64-bit modulo) */
 		if (do_div(start, sbi->s_stripe) == 0) {
 			ac->ac_found++;
@@ -1803,8 +1800,8 @@
 	BUG_ON(sbi->s_stripe == 0);
 
 	/* find first stripe-aligned block in group */
-	first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb)
-		+ le32_to_cpu(sbi->s_es->s_first_data_block);
+	first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
+
 	a = first_group_block + sbi->s_stripe - 1;
 	do_div(a, sbi->s_stripe);
 	i = (a * sbi->s_stripe) - first_group_block;
@@ -2256,7 +2253,7 @@
 
 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
 	init_rwsem(&meta_group_info[i]->alloc_sem);
-	meta_group_info[i]->bb_free_root.rb_node = NULL;
+	meta_group_info[i]->bb_free_root = RB_ROOT;
 
 #ifdef DOUBLE_CHECK
 	{
@@ -2560,12 +2557,9 @@
 		ext4_unlock_group(sb, entry->group);
 		if (test_opt(sb, DISCARD)) {
 			ext4_fsblk_t discard_block;
-			struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
-			discard_block = (ext4_fsblk_t)entry->group *
-						EXT4_BLOCKS_PER_GROUP(sb)
-					+ entry->start_blk
-					+ le32_to_cpu(es->s_first_data_block);
+			discard_block = entry->start_blk +
+				ext4_group_first_block_no(sb, entry->group);
 			trace_ext4_discard_blocks(sb,
 					(unsigned long long)discard_block,
 					entry->count);
@@ -2703,14 +2697,11 @@
 	if (err)
 		goto out_err;
 
-	block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb)
-		+ ac->ac_b_ex.fe_start
-		+ le32_to_cpu(es->s_first_data_block);
+	block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
 
 	len = ac->ac_b_ex.fe_len;
 	if (!ext4_data_block_valid(sbi, block, len)) {
-		ext4_error(sb, __func__,
-			   "Allocating blocks %llu-%llu which overlap "
+		ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
 			   "fs metadata\n", block, block+len);
 		/* File system mounted not to panic on error
 		 * Fix the bitmap and repeat the block allocation
@@ -3161,9 +3152,7 @@
 		/* The max size of hash table is PREALLOC_TB_SIZE */
 		order = PREALLOC_TB_SIZE - 1;
 
-	goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
-		     ac->ac_g_ex.fe_start +
-		     le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
+	goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
 	/*
 	 * search for the prealloc space that is having
 	 * minimal distance from the goal block.
@@ -3526,8 +3515,7 @@
 		if (bit >= end)
 			break;
 		next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
-		start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
-				le32_to_cpu(sbi->s_es->s_first_data_block);
+		start = ext4_group_first_block_no(sb, group) + bit;
 		mb_debug(1, "    free preallocated %u/%u in group %u\n",
 				(unsigned) start, (unsigned) next - bit,
 				(unsigned) group);
@@ -3623,15 +3611,13 @@
 
 	bitmap_bh = ext4_read_block_bitmap(sb, group);
 	if (bitmap_bh == NULL) {
-		ext4_error(sb, __func__, "Error in reading block "
-				"bitmap for %u", group);
+		ext4_error(sb, "Error reading block bitmap for %u", group);
 		return 0;
 	}
 
 	err = ext4_mb_load_buddy(sb, group, &e4b);
 	if (err) {
-		ext4_error(sb, __func__, "Error in loading buddy "
-				"information for %u", group);
+		ext4_error(sb, "Error loading buddy information for %u", group);
 		put_bh(bitmap_bh);
 		return 0;
 	}
@@ -3804,15 +3790,15 @@
 
 		err = ext4_mb_load_buddy(sb, group, &e4b);
 		if (err) {
-			ext4_error(sb, __func__, "Error in loading buddy "
-					"information for %u", group);
+			ext4_error(sb, "Error loading buddy information for %u",
+					group);
 			continue;
 		}
 
 		bitmap_bh = ext4_read_block_bitmap(sb, group);
 		if (bitmap_bh == NULL) {
-			ext4_error(sb, __func__, "Error in reading block "
-					"bitmap for %u", group);
+			ext4_error(sb, "Error reading block bitmap for %u",
+					group);
 			ext4_mb_release_desc(&e4b);
 			continue;
 		}
@@ -3938,7 +3924,7 @@
 
 	/* don't use group allocation for large files */
 	size = max(size, isize);
-	if (size >= sbi->s_mb_stream_request) {
+	if (size > sbi->s_mb_stream_request) {
 		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
 		return;
 	}
@@ -4077,8 +4063,8 @@
 
 		ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
 		if (ext4_mb_load_buddy(sb, group, &e4b)) {
-			ext4_error(sb, __func__, "Error in loading buddy "
-					"information for %u", group);
+			ext4_error(sb, "Error loading buddy information for %u",
+					group);
 			continue;
 		}
 		ext4_lock_group(sb, group);
@@ -4254,7 +4240,7 @@
 			return 0;
 		}
 		reserv_blks = ar->len;
-		while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) {
+		while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
 			ar->flags |= EXT4_MB_HINT_NOPREALLOC;
 			ar->len--;
 		}
@@ -4331,7 +4317,7 @@
 	kmem_cache_free(ext4_ac_cachep, ac);
 out1:
 	if (inquota && ar->len < inquota)
-		vfs_dq_free_block(ar->inode, inquota - ar->len);
+		dquot_free_block(ar->inode, inquota - ar->len);
 out3:
 	if (!ar->len) {
 		if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
@@ -4476,10 +4462,10 @@
 
 	sbi = EXT4_SB(sb);
 	es = EXT4_SB(sb)->s_es;
-	if (!ext4_data_block_valid(sbi, block, count)) {
-		ext4_error(sb, __func__,
-			    "Freeing blocks not in datazone - "
-			    "block = %llu, count = %lu", block, count);
+	if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
+	    !ext4_data_block_valid(sbi, block, count)) {
+		ext4_error(sb, "Freeing blocks not in datazone - "
+			   "block = %llu, count = %lu", block, count);
 		goto error_return;
 	}
 
@@ -4547,8 +4533,7 @@
 	    in_range(block + count - 1, ext4_inode_table(sb, gdp),
 		      EXT4_SB(sb)->s_itb_per_group)) {
 
-		ext4_error(sb, __func__,
-			   "Freeing blocks in system zone - "
+		ext4_error(sb, "Freeing blocks in system zone - "
 			   "Block = %llu, count = %lu", block, count);
 		/* err = 0. ext4_std_error should be a no op */
 		goto error_return;
@@ -4646,7 +4631,7 @@
 	sb->s_dirt = 1;
 error_return:
 	if (freed)
-		vfs_dq_free_block(inode, freed);
+		dquot_free_block(inode, freed);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
 	if (ac)
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 436521c..b619322 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -220,16 +220,9 @@
 #define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
 #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
 
-#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
-
 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
 					struct ext4_free_extent *fex)
 {
-	ext4_fsblk_t block;
-
-	block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
-			+ fex->fe_start
-			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-	return block;
+	return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start;
 }
 #endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 8141581..8b87bd0 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -365,12 +365,12 @@
 	 * happened after we started the migrate. We need to
 	 * fail the migrate
 	 */
-	if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) {
+	if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
 		retval = -EAGAIN;
 		up_write(&EXT4_I(inode)->i_data_sem);
 		goto err_out;
 	} else
-		EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
+		ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 	/*
 	 * We have the extent map build with the tmp inode.
 	 * Now copy the i_data across
@@ -503,14 +503,10 @@
 	}
 	i_size_write(tmp_inode, i_size_read(inode));
 	/*
-	 * We don't want the inode to be reclaimed
-	 * if we got interrupted in between. We have
-	 * this tmp inode carrying reference to the
-	 * data blocks of the original file. We set
-	 * the i_nlink to zero at the last stage after
-	 * switching the original file to extent format
+	 * Set the i_nlink to zero so it will be deleted later
+	 * when we drop inode reference.
 	 */
-	tmp_inode->i_nlink = 1;
+	tmp_inode->i_nlink = 0;
 
 	ext4_ext_tree_init(handle, tmp_inode);
 	ext4_orphan_add(handle, tmp_inode);
@@ -533,10 +529,20 @@
 	 * allocation.
 	 */
 	down_read((&EXT4_I(inode)->i_data_sem));
-	EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE;
+	ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 	up_read((&EXT4_I(inode)->i_data_sem));
 
 	handle = ext4_journal_start(inode, 1);
+	if (IS_ERR(handle)) {
+		/*
+		 * It is impossible to update on-disk structures without
+		 * a handle, so just rollback in-core changes and live other
+		 * work to orphan_list_cleanup()
+		 */
+		ext4_orphan_del(NULL, tmp_inode);
+		retval = PTR_ERR(handle);
+		goto out;
+	}
 
 	ei = EXT4_I(inode);
 	i_data = ei->i_data;
@@ -618,15 +624,8 @@
 
 	/* Reset the extent details */
 	ext4_ext_tree_init(handle, tmp_inode);
-
-	/*
-	 * Set the i_nlink to zero so that
-	 * generic_drop_inode really deletes the
-	 * inode
-	 */
-	tmp_inode->i_nlink = 0;
-
 	ext4_journal_stop(handle);
+out:
 	unlock_new_inode(tmp_inode);
 	iput(tmp_inode);
 
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 82c415b..aa5fe28 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -152,12 +152,12 @@
 	int ret = 0;
 
 	if (inode1 == NULL) {
-		ext4_error(inode2->i_sb, function,
+		__ext4_error(inode2->i_sb, function,
 			"Both inodes should not be NULL: "
 			"inode1 NULL inode2 %lu", inode2->i_ino);
 		ret = -EIO;
 	} else if (inode2 == NULL) {
-		ext4_error(inode1->i_sb, function,
+		__ext4_error(inode1->i_sb, function,
 			"Both inodes should not be NULL: "
 			"inode1 %lu inode2 NULL", inode1->i_ino);
 		ret = -EIO;
@@ -252,6 +252,7 @@
 		}
 
 		o_start->ee_len = start_ext->ee_len;
+		eblock = le32_to_cpu(start_ext->ee_block);
 		new_flag = 1;
 
 	} else if (start_ext->ee_len && new_ext->ee_len &&
@@ -262,6 +263,7 @@
 		 * orig  |------------------------------|
 		 */
 		o_start->ee_len = start_ext->ee_len;
+		eblock = le32_to_cpu(start_ext->ee_block);
 		new_flag = 1;
 
 	} else if (!start_ext->ee_len && new_ext->ee_len &&
@@ -475,7 +477,6 @@
 	struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
 	struct ext4_extent new_ext, start_ext, end_ext;
 	ext4_lblk_t new_ext_end;
-	ext4_fsblk_t new_phys_end;
 	int oext_alen, new_ext_alen, end_ext_alen;
 	int depth = ext_depth(orig_inode);
 	int ret;
@@ -489,7 +490,6 @@
 	new_ext.ee_len = dext->ee_len;
 	new_ext_alen = ext4_ext_get_actual_len(&new_ext);
 	new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
-	new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
 
 	/*
 	 * Case: original extent is first
@@ -502,6 +502,7 @@
 		le32_to_cpu(oext->ee_block) + oext_alen) {
 		start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
 					       le32_to_cpu(oext->ee_block));
+		start_ext.ee_block = oext->ee_block;
 		copy_extent_status(oext, &start_ext);
 	} else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
 		prev_ext = oext - 1;
@@ -515,6 +516,7 @@
 			start_ext.ee_len = cpu_to_le16(
 				ext4_ext_get_actual_len(prev_ext) +
 				new_ext_alen);
+			start_ext.ee_block = oext->ee_block;
 			copy_extent_status(prev_ext, &start_ext);
 			new_ext.ee_len = 0;
 		}
@@ -526,7 +528,7 @@
 	 * new_ext       |-------|
 	 */
 	if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
-		ext4_error(orig_inode->i_sb, __func__,
+		ext4_error(orig_inode->i_sb,
 			"new_ext_end(%u) should be less than or equal to "
 			"oext->ee_block(%u) + oext_alen(%d) - 1",
 			new_ext_end, le32_to_cpu(oext->ee_block),
@@ -689,12 +691,12 @@
 	while (1) {
 		/* The extent for donor must be found. */
 		if (!dext) {
-			ext4_error(donor_inode->i_sb, __func__,
+			ext4_error(donor_inode->i_sb,
 				   "The extent for donor must be found");
 			*err = -EIO;
 			goto out;
 		} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
-			ext4_error(donor_inode->i_sb, __func__,
+			ext4_error(donor_inode->i_sb,
 				"Donor offset(%u) and the first block of donor "
 				"extent(%u) should be equal",
 				donor_off,
@@ -928,7 +930,7 @@
 }
 
 /**
- * mext_check_argumants - Check whether move extent can be done
+ * mext_check_arguments - Check whether move extent can be done
  *
  * @orig_inode:		original inode
  * @donor_inode:	donor inode
@@ -949,14 +951,6 @@
 	unsigned int blkbits = orig_inode->i_blkbits;
 	unsigned int blocksize = 1 << blkbits;
 
-	/* Regular file check */
-	if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
-		ext4_debug("ext4 move extent: The argument files should be "
-			"regular file [ino:orig %lu, donor %lu]\n",
-			orig_inode->i_ino, donor_inode->i_ino);
-		return -EINVAL;
-	}
-
 	if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
 		ext4_debug("ext4 move extent: suid or sgid is set"
 			   " to donor file [ino:orig %lu, donor %lu]\n",
@@ -1204,6 +1198,14 @@
 		return -EINVAL;
 	}
 
+	/* Regular file check */
+	if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
+		ext4_debug("ext4 move extent: The argument files should be "
+			"regular file [ino:orig %lu, donor %lu]\n",
+			orig_inode->i_ino, donor_inode->i_ino);
+		return -EINVAL;
+	}
+
 	/* Protect orig and donor inodes against a truncate */
 	ret1 = mext_inode_double_lock(orig_inode, donor_inode);
 	if (ret1 < 0)
@@ -1351,7 +1353,7 @@
 			if (ret1 < 0)
 				break;
 			if (*moved_len > len) {
-				ext4_error(orig_inode->i_sb, __func__,
+				ext4_error(orig_inode->i_sb,
 					"We replaced blocks too much! "
 					"sum of replaced: %llu requested: %llu",
 					*moved_len, len);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 17a17e1..0c070fa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -383,8 +383,7 @@
 	if (root->info.hash_version != DX_HASH_TEA &&
 	    root->info.hash_version != DX_HASH_HALF_MD4 &&
 	    root->info.hash_version != DX_HASH_LEGACY) {
-		ext4_warning(dir->i_sb, __func__,
-			     "Unrecognised inode hash code %d",
+		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
 			     root->info.hash_version);
 		brelse(bh);
 		*err = ERR_BAD_DX_DIR;
@@ -399,8 +398,7 @@
 	hash = hinfo->hash;
 
 	if (root->info.unused_flags & 1) {
-		ext4_warning(dir->i_sb, __func__,
-			     "Unimplemented inode hash flags: %#06x",
+		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
 			     root->info.unused_flags);
 		brelse(bh);
 		*err = ERR_BAD_DX_DIR;
@@ -408,8 +406,7 @@
 	}
 
 	if ((indirect = root->info.indirect_levels) > 1) {
-		ext4_warning(dir->i_sb, __func__,
-			     "Unimplemented inode hash depth: %#06x",
+		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
 			     root->info.indirect_levels);
 		brelse(bh);
 		*err = ERR_BAD_DX_DIR;
@@ -421,8 +418,7 @@
 
 	if (dx_get_limit(entries) != dx_root_limit(dir,
 						   root->info.info_length)) {
-		ext4_warning(dir->i_sb, __func__,
-			     "dx entry: limit != root limit");
+		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
 		brelse(bh);
 		*err = ERR_BAD_DX_DIR;
 		goto fail;
@@ -433,7 +429,7 @@
 	{
 		count = dx_get_count(entries);
 		if (!count || count > dx_get_limit(entries)) {
-			ext4_warning(dir->i_sb, __func__,
+			ext4_warning(dir->i_sb,
 				     "dx entry: no count or count > limit");
 			brelse(bh);
 			*err = ERR_BAD_DX_DIR;
@@ -478,7 +474,7 @@
 			goto fail2;
 		at = entries = ((struct dx_node *) bh->b_data)->entries;
 		if (dx_get_limit(entries) != dx_node_limit (dir)) {
-			ext4_warning(dir->i_sb, __func__,
+			ext4_warning(dir->i_sb,
 				     "dx entry: limit != node limit");
 			brelse(bh);
 			*err = ERR_BAD_DX_DIR;
@@ -494,7 +490,7 @@
 	}
 fail:
 	if (*err == ERR_BAD_DX_DIR)
-		ext4_warning(dir->i_sb, __func__,
+		ext4_warning(dir->i_sb,
 			     "Corrupt dir inode %ld, running e2fsck is "
 			     "recommended.", dir->i_ino);
 	return NULL;
@@ -947,9 +943,8 @@
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
-			ext4_error(sb, __func__, "reading directory #%lu "
-				   "offset %lu", dir->i_ino,
-				   (unsigned long)block);
+			ext4_error(sb, "reading directory #%lu offset %lu",
+				   dir->i_ino, (unsigned long)block);
 			brelse(bh);
 			goto next;
 		}
@@ -1041,7 +1036,7 @@
 		retval = ext4_htree_next_block(dir, hash, frame,
 					       frames, NULL);
 		if (retval < 0) {
-			ext4_warning(sb, __func__,
+			ext4_warning(sb,
 			     "error reading index page in directory #%lu",
 			     dir->i_ino);
 			*err = retval;
@@ -1071,14 +1066,13 @@
 		__u32 ino = le32_to_cpu(de->inode);
 		brelse(bh);
 		if (!ext4_valid_inum(dir->i_sb, ino)) {
-			ext4_error(dir->i_sb, "ext4_lookup",
-				   "bad inode number: %u", ino);
+			ext4_error(dir->i_sb, "bad inode number: %u", ino);
 			return ERR_PTR(-EIO);
 		}
 		inode = ext4_iget(dir->i_sb, ino);
 		if (unlikely(IS_ERR(inode))) {
 			if (PTR_ERR(inode) == -ESTALE) {
-				ext4_error(dir->i_sb, __func__,
+				ext4_error(dir->i_sb,
 						"deleted inode referenced: %u",
 						ino);
 				return ERR_PTR(-EIO);
@@ -1110,7 +1104,7 @@
 	brelse(bh);
 
 	if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
-		ext4_error(child->d_inode->i_sb, "ext4_get_parent",
+		ext4_error(child->d_inode->i_sb,
 			   "bad inode number: %u", ino);
 		return ERR_PTR(-EIO);
 	}
@@ -1410,7 +1404,7 @@
 	de = (struct ext4_dir_entry_2 *)((char *)fde +
 		ext4_rec_len_from_disk(fde->rec_len, blocksize));
 	if ((char *) de >= (((char *) root) + blocksize)) {
-		ext4_error(dir->i_sb, __func__,
+		ext4_error(dir->i_sb,
 			   "invalid rec_len for '..' in inode %lu",
 			   dir->i_ino);
 		brelse(bh);
@@ -1575,8 +1569,7 @@
 
 		if (levels && (dx_get_count(frames->entries) ==
 			       dx_get_limit(frames->entries))) {
-			ext4_warning(sb, __func__,
-				     "Directory index full!");
+			ext4_warning(sb, "Directory index full!");
 			err = -ENOSPC;
 			goto cleanup;
 		}
@@ -1766,6 +1759,8 @@
 	struct inode *inode;
 	int err, retries = 0;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1800,6 +1795,8 @@
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1837,6 +1834,8 @@
 	if (EXT4_DIR_LINK_MAX(dir))
 		return -EMLINK;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1916,11 +1915,11 @@
 	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
 	    !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
 		if (err)
-			ext4_error(inode->i_sb, __func__,
+			ext4_error(inode->i_sb,
 				   "error %d reading directory #%lu offset 0",
 				   err, inode->i_ino);
 		else
-			ext4_warning(inode->i_sb, __func__,
+			ext4_warning(inode->i_sb,
 				     "bad directory (dir #%lu) - no data block",
 				     inode->i_ino);
 		return 1;
@@ -1931,7 +1930,7 @@
 			!le32_to_cpu(de1->inode) ||
 			strcmp(".", de->name) ||
 			strcmp("..", de1->name)) {
-		ext4_warning(inode->i_sb, "empty_dir",
+		ext4_warning(inode->i_sb,
 			     "bad directory (dir #%lu) - no `.' or `..'",
 			     inode->i_ino);
 		brelse(bh);
@@ -1949,7 +1948,7 @@
 				offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
 			if (!bh) {
 				if (err)
-					ext4_error(sb, __func__,
+					ext4_error(sb,
 						   "error %d reading directory"
 						   " #%lu offset %u",
 						   err, inode->i_ino, offset);
@@ -2020,11 +2019,18 @@
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (err)
 		goto out_unlock;
+	/*
+	 * Due to previous errors inode may be already a part of on-disk
+	 * orphan list. If so skip on-disk list modification.
+	 */
+	if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
+		(le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
+			goto mem_insert;
 
 	/* Insert this inode at the head of the on-disk orphan list... */
 	NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
 	EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-	err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
+	err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
 	if (!err)
 		err = rc;
@@ -2037,6 +2043,7 @@
 	 *
 	 * This is safe: on error we're going to ignore the orphan list
 	 * anyway on the next recovery. */
+mem_insert:
 	if (!err)
 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 
@@ -2096,7 +2103,7 @@
 		if (err)
 			goto out_brelse;
 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-		err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
+		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
 	} else {
 		struct ext4_iloc iloc2;
 		struct inode *i_prev =
@@ -2136,7 +2143,9 @@
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
+
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2163,7 +2172,7 @@
 	if (retval)
 		goto end_rmdir;
 	if (!EXT4_DIR_LINK_EMPTY(inode))
-		ext4_warning(inode->i_sb, "ext4_rmdir",
+		ext4_warning(inode->i_sb,
 			     "empty directory has too many links (%d)",
 			     inode->i_nlink);
 	inode->i_version++;
@@ -2195,7 +2204,9 @@
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
+
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2215,7 +2226,7 @@
 		goto end_unlink;
 
 	if (!inode->i_nlink) {
-		ext4_warning(inode->i_sb, "ext4_unlink",
+		ext4_warning(inode->i_sb,
 			     "Deleting nonexistent file (%lu), %d",
 			     inode->i_ino, inode->i_nlink);
 		inode->i_nlink = 1;
@@ -2250,6 +2261,8 @@
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	dquot_initialize(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2308,6 +2321,8 @@
 	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
+	dquot_initialize(dir);
+
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
 	 * otherwise has the potential to corrupt the orphan inode list.
@@ -2358,12 +2373,15 @@
 	struct ext4_dir_entry_2 *old_de, *new_de;
 	int retval, force_da_alloc = 0;
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	if (new_dentry->d_inode)
-		vfs_dq_init(new_dentry->d_inode);
+		dquot_initialize(new_dentry->d_inode);
 	handle = ext4_journal_start(old_dir, 2 *
 					EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
@@ -2462,7 +2480,7 @@
 		}
 	}
 	if (retval) {
-		ext4_warning(old_dir->i_sb, "ext4_rename",
+		ext4_warning(old_dir->i_sb,
 				"Deleting old file (%lu), %d, error=%d",
 				old_dir->i_ino, old_dir->i_nlink, retval);
 	}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3b2c554..5692c48 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -48,65 +48,54 @@
 
 	ext4_get_group_no_and_offset(sb, start, NULL, &offset);
 	if (group != sbi->s_groups_count)
-		ext4_warning(sb, __func__,
-			     "Cannot add at group %u (only %u groups)",
+		ext4_warning(sb, "Cannot add at group %u (only %u groups)",
 			     input->group, sbi->s_groups_count);
 	else if (offset != 0)
-			ext4_warning(sb, __func__, "Last group not full");
+			ext4_warning(sb, "Last group not full");
 	else if (input->reserved_blocks > input->blocks_count / 5)
-		ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
+		ext4_warning(sb, "Reserved blocks too high (%u)",
 			     input->reserved_blocks);
 	else if (free_blocks_count < 0)
-		ext4_warning(sb, __func__, "Bad blocks count %u",
+		ext4_warning(sb, "Bad blocks count %u",
 			     input->blocks_count);
 	else if (!(bh = sb_bread(sb, end - 1)))
-		ext4_warning(sb, __func__,
-			     "Cannot read last block (%llu)",
+		ext4_warning(sb, "Cannot read last block (%llu)",
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
-		ext4_warning(sb, __func__,
-			     "Block bitmap not in group (block %llu)",
+		ext4_warning(sb, "Block bitmap not in group (block %llu)",
 			     (unsigned long long)input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
-		ext4_warning(sb, __func__,
-			     "Inode bitmap not in group (block %llu)",
+		ext4_warning(sb, "Inode bitmap not in group (block %llu)",
 			     (unsigned long long)input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 		 outside(itend - 1, start, end))
-		ext4_warning(sb, __func__,
-			     "Inode table not in group (blocks %llu-%llu)",
+		ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
 			     (unsigned long long)input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
-		ext4_warning(sb, __func__,
-			     "Block bitmap same as inode bitmap (%llu)",
+		ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
 			     (unsigned long long)input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
-		ext4_warning(sb, __func__,
-			     "Block bitmap (%llu) in inode table (%llu-%llu)",
+		ext4_warning(sb, "Block bitmap (%llu) in inode table "
+			     "(%llu-%llu)",
 			     (unsigned long long)input->block_bitmap,
 			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
-		ext4_warning(sb, __func__,
-			     "Inode bitmap (%llu) in inode table (%llu-%llu)",
+		ext4_warning(sb, "Inode bitmap (%llu) in inode table "
+			     "(%llu-%llu)",
 			     (unsigned long long)input->inode_bitmap,
 			     (unsigned long long)input->inode_table, itend - 1);
 	else if (inside(input->block_bitmap, start, metaend))
-		ext4_warning(sb, __func__,
-			     "Block bitmap (%llu) in GDT table"
-			     " (%llu-%llu)",
+		ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
 			     (unsigned long long)input->block_bitmap,
 			     start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
-		ext4_warning(sb, __func__,
-			     "Inode bitmap (%llu) in GDT table"
-			     " (%llu-%llu)",
+		ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
 			     (unsigned long long)input->inode_bitmap,
 			     start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 		 inside(itend - 1, start, metaend))
-		ext4_warning(sb, __func__,
-			     "Inode table (%llu-%llu) overlaps"
-			     "GDT table (%llu-%llu)",
+		ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
+			     "(%llu-%llu)",
 			     (unsigned long long)input->inode_table,
 			     itend - 1, start, metaend - 1);
 	else
@@ -364,8 +353,7 @@
 	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
 		if (le32_to_cpu(*p++) !=
 		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
-			ext4_warning(sb, __func__,
-				     "reserved GDT %llu"
+			ext4_warning(sb, "reserved GDT %llu"
 				     " missing grp %d (%llu)",
 				     blk, grp,
 				     grp *
@@ -420,8 +408,7 @@
          */
 	if (EXT4_SB(sb)->s_sbh->b_blocknr !=
 	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
-		ext4_warning(sb, __func__,
-			"won't resize using backup superblock at %llu",
+		ext4_warning(sb, "won't resize using backup superblock at %llu",
 			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
 		return -EPERM;
 	}
@@ -444,8 +431,7 @@
 
 	data = (__le32 *)dind->b_data;
 	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
-		ext4_warning(sb, __func__,
-			     "new group %u GDT block %llu not reserved",
+		ext4_warning(sb, "new group %u GDT block %llu not reserved",
 			     input->group, gdblock);
 		err = -EINVAL;
 		goto exit_dind;
@@ -468,7 +454,7 @@
 			GFP_NOFS);
 	if (!n_group_desc) {
 		err = -ENOMEM;
-		ext4_warning(sb, __func__,
+		ext4_warning(sb,
 			      "not enough memory for %lu groups", gdb_num + 1);
 		goto exit_inode;
 	}
@@ -567,8 +553,7 @@
 	/* Get each reserved primary GDT block and verify it holds backups */
 	for (res = 0; res < reserved_gdb; res++, blk++) {
 		if (le32_to_cpu(*data) != blk) {
-			ext4_warning(sb, __func__,
-				     "reserved block %llu"
+			ext4_warning(sb, "reserved block %llu"
 				     " not at offset %ld",
 				     blk,
 				     (long)(data - (__le32 *)dind->b_data));
@@ -713,8 +698,7 @@
 	 */
 exit_err:
 	if (err) {
-		ext4_warning(sb, __func__,
-			     "can't update backup for group %u (err %d), "
+		ext4_warning(sb, "can't update backup for group %u (err %d), "
 			     "forcing fsck on next reboot", group, err);
 		sbi->s_mount_state &= ~EXT4_VALID_FS;
 		sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@@ -753,20 +737,19 @@
 
 	if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
 					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
-		ext4_warning(sb, __func__,
-			     "Can't resize non-sparse filesystem further");
+		ext4_warning(sb, "Can't resize non-sparse filesystem further");
 		return -EPERM;
 	}
 
 	if (ext4_blocks_count(es) + input->blocks_count <
 	    ext4_blocks_count(es)) {
-		ext4_warning(sb, __func__, "blocks_count overflow");
+		ext4_warning(sb, "blocks_count overflow");
 		return -EINVAL;
 	}
 
 	if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
 	    le32_to_cpu(es->s_inodes_count)) {
-		ext4_warning(sb, __func__, "inodes_count overflow");
+		ext4_warning(sb, "inodes_count overflow");
 		return -EINVAL;
 	}
 
@@ -774,14 +757,13 @@
 		if (!EXT4_HAS_COMPAT_FEATURE(sb,
 					     EXT4_FEATURE_COMPAT_RESIZE_INODE)
 		    || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
-			ext4_warning(sb, __func__,
+			ext4_warning(sb,
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
 		}
 		inode = ext4_iget(sb, EXT4_RESIZE_INO);
 		if (IS_ERR(inode)) {
-			ext4_warning(sb, __func__,
-				     "Error opening resize inode");
+			ext4_warning(sb, "Error opening resize inode");
 			return PTR_ERR(inode);
 		}
 	}
@@ -810,8 +792,7 @@
 
 	mutex_lock(&sbi->s_resize_lock);
 	if (input->group != sbi->s_groups_count) {
-		ext4_warning(sb, __func__,
-			     "multiple resizers run on filesystem!");
+		ext4_warning(sb, "multiple resizers run on filesystem!");
 		err = -EBUSY;
 		goto exit_journal;
 	}
@@ -997,13 +978,12 @@
 			" too large to resize to %llu blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
-			ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled");
+			ext4_warning(sb, "CONFIG_LBDAF not enabled");
 		return -EINVAL;
 	}
 
 	if (n_blocks_count < o_blocks_count) {
-		ext4_warning(sb, __func__,
-			     "can't shrink FS - resize aborted");
+		ext4_warning(sb, "can't shrink FS - resize aborted");
 		return -EBUSY;
 	}
 
@@ -1011,15 +991,14 @@
 	ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
 
 	if (last == 0) {
-		ext4_warning(sb, __func__,
-			     "need to use ext2online to resize further");
+		ext4_warning(sb, "need to use ext2online to resize further");
 		return -EPERM;
 	}
 
 	add = EXT4_BLOCKS_PER_GROUP(sb) - last;
 
 	if (o_blocks_count + add < o_blocks_count) {
-		ext4_warning(sb, __func__, "blocks_count overflow");
+		ext4_warning(sb, "blocks_count overflow");
 		return -EINVAL;
 	}
 
@@ -1027,16 +1006,13 @@
 		add = n_blocks_count - o_blocks_count;
 
 	if (o_blocks_count + add < n_blocks_count)
-		ext4_warning(sb, __func__,
-			     "will only finish group (%llu"
-			     " blocks, %u new)",
+		ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
 			     o_blocks_count + add, add);
 
 	/* See if the device is actually as big as what was requested */
 	bh = sb_bread(sb, o_blocks_count + add - 1);
 	if (!bh) {
-		ext4_warning(sb, __func__,
-			     "can't read last block, resize aborted");
+		ext4_warning(sb, "can't read last block, resize aborted");
 		return -ENOSPC;
 	}
 	brelse(bh);
@@ -1047,14 +1023,13 @@
 	handle = ext4_journal_start_sb(sb, 3);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
-		ext4_warning(sb, __func__, "error %d on journal start", err);
+		ext4_warning(sb, "error %d on journal start", err);
 		goto exit_put;
 	}
 
 	mutex_lock(&EXT4_SB(sb)->s_resize_lock);
 	if (o_blocks_count != ext4_blocks_count(es)) {
-		ext4_warning(sb, __func__,
-			     "multiple resizers run on filesystem!");
+		ext4_warning(sb, "multiple resizers run on filesystem!");
 		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		err = -EBUSY;
@@ -1063,8 +1038,7 @@
 
 	if ((err = ext4_journal_get_write_access(handle,
 						 EXT4_SB(sb)->s_sbh))) {
-		ext4_warning(sb, __func__,
-			     "error %d on journal write access", err);
+		ext4_warning(sb, "error %d on journal write access", err);
 		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		goto exit_put;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 735c20d..2b83b96 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -333,7 +333,7 @@
 			sb->s_id);
 }
 
-void ext4_error(struct super_block *sb, const char *function,
+void __ext4_error(struct super_block *sb, const char *function,
 		const char *fmt, ...)
 {
 	va_list args;
@@ -347,6 +347,42 @@
 	ext4_handle_error(sb);
 }
 
+void ext4_error_inode(const char *function, struct inode *inode,
+		      const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ",
+	       inode->i_sb->s_id, function, inode->i_ino, current->comm);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+
+	ext4_handle_error(inode->i_sb);
+}
+
+void ext4_error_file(const char *function, struct file *file,
+		     const char *fmt, ...)
+{
+	va_list args;
+	struct inode *inode = file->f_dentry->d_inode;
+	char pathname[80], *path;
+
+	va_start(args, fmt);
+	path = d_path(&(file->f_path), pathname, sizeof(pathname));
+	if (!path)
+		path = "(unknown)";
+	printk(KERN_CRIT
+	       "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ",
+	       inode->i_sb->s_id, function, inode->i_ino, current->comm, path);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+
+	ext4_handle_error(inode->i_sb);
+}
+
 static const char *ext4_decode_error(struct super_block *sb, int errno,
 				     char nbuf[16])
 {
@@ -450,7 +486,7 @@
 	va_end(args);
 }
 
-void ext4_warning(struct super_block *sb, const char *function,
+void __ext4_warning(struct super_block *sb, const char *function,
 		  const char *fmt, ...)
 {
 	va_list args;
@@ -507,7 +543,7 @@
 	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 		return;
 
-	ext4_warning(sb, __func__,
+	ext4_warning(sb,
 		     "updating to rev %d because of new feature flag, "
 		     "running e2fsck is recommended",
 		     EXT4_DYNAMIC_REV);
@@ -708,7 +744,8 @@
 #ifdef CONFIG_QUOTA
 	ei->i_reserved_quota = 0;
 #endif
-	INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
+	INIT_LIST_HEAD(&ei->i_completed_io_list);
+	spin_lock_init(&ei->i_completed_io_lock);
 	ei->cur_aio_dio = NULL;
 	ei->i_sync_tid = 0;
 	ei->i_datasync_tid = 0;
@@ -761,6 +798,7 @@
 
 static void ext4_clear_inode(struct inode *inode)
 {
+	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	if (EXT4_JOURNAL(inode))
 		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
@@ -796,10 +834,10 @@
 	if (sbi->s_qf_names[GRPQUOTA])
 		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 
-	if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
+	if (test_opt(sb, USRQUOTA))
 		seq_puts(seq, ",usrquota");
 
-	if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
+	if (test_opt(sb, GRPQUOTA))
 		seq_puts(seq, ",grpquota");
 #endif
 }
@@ -926,6 +964,9 @@
 	if (test_opt(sb, NOLOAD))
 		seq_puts(seq, ",norecovery");
 
+	if (test_opt(sb, DIOREAD_NOLOCK))
+		seq_puts(seq, ",dioread_nolock");
+
 	ext4_show_quota_options(seq, sb);
 
 	return 0;
@@ -1012,19 +1053,9 @@
 				const char *data, size_t len, loff_t off);
 
 static const struct dquot_operations ext4_quota_operations = {
-	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
-	.reserve_space	= dquot_reserve_space,
-	.claim_space	= dquot_claim_space,
-	.release_rsv	= dquot_release_reserved_space,
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
-	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
-	.free_inode	= dquot_free_inode,
-	.transfer	= dquot_transfer,
 	.write_dquot	= ext4_write_dquot,
 	.acquire_dquot	= ext4_acquire_dquot,
 	.release_dquot	= ext4_release_dquot,
@@ -1109,6 +1140,7 @@
 	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
 	Opt_block_validity, Opt_noblock_validity,
 	Opt_inode_readahead_blks, Opt_journal_ioprio,
+	Opt_dioread_nolock, Opt_dioread_lock,
 	Opt_discard, Opt_nodiscard,
 };
 
@@ -1176,6 +1208,8 @@
 	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
 	{Opt_auto_da_alloc, "auto_da_alloc"},
 	{Opt_noauto_da_alloc, "noauto_da_alloc"},
+	{Opt_dioread_nolock, "dioread_nolock"},
+	{Opt_dioread_lock, "dioread_lock"},
 	{Opt_discard, "discard"},
 	{Opt_nodiscard, "nodiscard"},
 	{Opt_err, NULL},
@@ -1205,6 +1239,66 @@
 }
 
 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
+static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
+	"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
+
+#ifdef CONFIG_QUOTA
+static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	char *qname;
+
+	if (sb_any_quota_loaded(sb) &&
+		!sbi->s_qf_names[qtype]) {
+		ext4_msg(sb, KERN_ERR,
+			"Cannot change journaled "
+			"quota options when quota turned on");
+		return 0;
+	}
+	qname = match_strdup(args);
+	if (!qname) {
+		ext4_msg(sb, KERN_ERR,
+			"Not enough memory for storing quotafile name");
+		return 0;
+	}
+	if (sbi->s_qf_names[qtype] &&
+		strcmp(sbi->s_qf_names[qtype], qname)) {
+		ext4_msg(sb, KERN_ERR,
+			"%s quota file already specified", QTYPE2NAME(qtype));
+		kfree(qname);
+		return 0;
+	}
+	sbi->s_qf_names[qtype] = qname;
+	if (strchr(sbi->s_qf_names[qtype], '/')) {
+		ext4_msg(sb, KERN_ERR,
+			"quotafile must be on filesystem root");
+		kfree(sbi->s_qf_names[qtype]);
+		sbi->s_qf_names[qtype] = NULL;
+		return 0;
+	}
+	set_opt(sbi->s_mount_opt, QUOTA);
+	return 1;
+}
+
+static int clear_qf_name(struct super_block *sb, int qtype)
+{
+
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+	if (sb_any_quota_loaded(sb) &&
+		sbi->s_qf_names[qtype]) {
+		ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
+			" when quota turned on");
+		return 0;
+	}
+	/*
+	 * The space will be released later when all options are confirmed
+	 * to be correct
+	 */
+	sbi->s_qf_names[qtype] = NULL;
+	return 1;
+}
+#endif
 
 static int parse_options(char *options, struct super_block *sb,
 			 unsigned long *journal_devnum,
@@ -1217,8 +1311,7 @@
 	int data_opt = 0;
 	int option;
 #ifdef CONFIG_QUOTA
-	int qtype, qfmt;
-	char *qname;
+	int qfmt;
 #endif
 
 	if (!options)
@@ -1229,19 +1322,31 @@
 		if (!*p)
 			continue;
 
+		/*
+		 * Initialize args struct so we know whether arg was
+		 * found; some options take optional arguments.
+		 */
+		args[0].to = args[0].from = 0;
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_bsd_df:
+			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 			clear_opt(sbi->s_mount_opt, MINIX_DF);
 			break;
 		case Opt_minix_df:
+			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 			set_opt(sbi->s_mount_opt, MINIX_DF);
+
 			break;
 		case Opt_grpid:
+			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 			set_opt(sbi->s_mount_opt, GRPID);
+
 			break;
 		case Opt_nogrpid:
+			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
 			clear_opt(sbi->s_mount_opt, GRPID);
+
 			break;
 		case Opt_resuid:
 			if (match_int(&args[0], &option))
@@ -1378,14 +1483,13 @@
 			data_opt = EXT4_MOUNT_WRITEBACK_DATA;
 		datacheck:
 			if (is_remount) {
-				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
-						!= data_opt) {
+				if (test_opt(sb, DATA_FLAGS) != data_opt) {
 					ext4_msg(sb, KERN_ERR,
 						"Cannot change data mode on remount");
 					return 0;
 				}
 			} else {
-				sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
+				clear_opt(sbi->s_mount_opt, DATA_FLAGS);
 				sbi->s_mount_opt |= data_opt;
 			}
 			break;
@@ -1397,63 +1501,22 @@
 			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
-			qtype = USRQUOTA;
-			goto set_qf_name;
+			if (!set_qf_name(sb, USRQUOTA, &args[0]))
+				return 0;
+			break;
 		case Opt_grpjquota:
-			qtype = GRPQUOTA;
-set_qf_name:
-			if (sb_any_quota_loaded(sb) &&
-			    !sbi->s_qf_names[qtype]) {
-				ext4_msg(sb, KERN_ERR,
-				       "Cannot change journaled "
-				       "quota options when quota turned on");
+			if (!set_qf_name(sb, GRPQUOTA, &args[0]))
 				return 0;
-			}
-			qname = match_strdup(&args[0]);
-			if (!qname) {
-				ext4_msg(sb, KERN_ERR,
-					"Not enough memory for "
-					"storing quotafile name");
-				return 0;
-			}
-			if (sbi->s_qf_names[qtype] &&
-			    strcmp(sbi->s_qf_names[qtype], qname)) {
-				ext4_msg(sb, KERN_ERR,
-					"%s quota file already "
-					"specified", QTYPE2NAME(qtype));
-				kfree(qname);
-				return 0;
-			}
-			sbi->s_qf_names[qtype] = qname;
-			if (strchr(sbi->s_qf_names[qtype], '/')) {
-				ext4_msg(sb, KERN_ERR,
-					"quotafile must be on "
-					"filesystem root");
-				kfree(sbi->s_qf_names[qtype]);
-				sbi->s_qf_names[qtype] = NULL;
-				return 0;
-			}
-			set_opt(sbi->s_mount_opt, QUOTA);
 			break;
 		case Opt_offusrjquota:
-			qtype = USRQUOTA;
-			goto clear_qf_name;
-		case Opt_offgrpjquota:
-			qtype = GRPQUOTA;
-clear_qf_name:
-			if (sb_any_quota_loaded(sb) &&
-			    sbi->s_qf_names[qtype]) {
-				ext4_msg(sb, KERN_ERR, "Cannot change "
-					"journaled quota options when "
-					"quota turned on");
+			if (!clear_qf_name(sb, USRQUOTA))
 				return 0;
-			}
-			/*
-			 * The space will be released later when all options
-			 * are confirmed to be correct
-			 */
-			sbi->s_qf_names[qtype] = NULL;
 			break;
+		case Opt_offgrpjquota:
+			if (!clear_qf_name(sb, GRPQUOTA))
+				return 0;
+			break;
+
 		case Opt_jqfmt_vfsold:
 			qfmt = QFMT_VFS_OLD;
 			goto set_qf_format;
@@ -1518,10 +1581,11 @@
 			clear_opt(sbi->s_mount_opt, BARRIER);
 			break;
 		case Opt_barrier:
-			if (match_int(&args[0], &option)) {
-				set_opt(sbi->s_mount_opt, BARRIER);
-				break;
-			}
+			if (args[0].from) {
+				if (match_int(&args[0], &option))
+					return 0;
+			} else
+				option = 1;	/* No argument, default to 1 */
 			if (option)
 				set_opt(sbi->s_mount_opt, BARRIER);
 			else
@@ -1594,10 +1658,11 @@
 			set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
 			break;
 		case Opt_auto_da_alloc:
-			if (match_int(&args[0], &option)) {
-				clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
-				break;
-			}
+			if (args[0].from) {
+				if (match_int(&args[0], &option))
+					return 0;
+			} else
+				option = 1;	/* No argument, default to 1 */
 			if (option)
 				clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
 			else
@@ -1609,6 +1674,12 @@
 		case Opt_nodiscard:
 			clear_opt(sbi->s_mount_opt, DISCARD);
 			break;
+		case Opt_dioread_nolock:
+			set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+			break;
+		case Opt_dioread_lock:
+			clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+			break;
 		default:
 			ext4_msg(sb, KERN_ERR,
 			       "Unrecognized mount option \"%s\" "
@@ -1618,18 +1689,13 @@
 	}
 #ifdef CONFIG_QUOTA
 	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
-		if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
-		     sbi->s_qf_names[USRQUOTA])
+		if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
 			clear_opt(sbi->s_mount_opt, USRQUOTA);
 
-		if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
-		     sbi->s_qf_names[GRPQUOTA])
+		if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
 
-		if ((sbi->s_qf_names[USRQUOTA] &&
-				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
-		    (sbi->s_qf_names[GRPQUOTA] &&
-				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
+		if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
 			ext4_msg(sb, KERN_ERR, "old and new quota "
 					"format mixing");
 			return 0;
@@ -1939,7 +2005,7 @@
 		}
 
 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (inode->i_nlink) {
 			ext4_msg(sb, KERN_DEBUG,
 				"%s: truncating inode %lu to %lld bytes",
@@ -2432,8 +2498,11 @@
 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 	if (def_mount_opts & EXT4_DEFM_DEBUG)
 		set_opt(sbi->s_mount_opt, DEBUG);
-	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
+	if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
+		ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
+			"2.6.38");
 		set_opt(sbi->s_mount_opt, GRPID);
+	}
 	if (def_mount_opts & EXT4_DEFM_UID16)
 		set_opt(sbi->s_mount_opt, NO_UID32);
 #ifdef CONFIG_EXT4_FS_XATTR
@@ -2445,11 +2514,11 @@
 		set_opt(sbi->s_mount_opt, POSIX_ACL);
 #endif
 	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
-		sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
+		set_opt(sbi->s_mount_opt, JOURNAL_DATA);
 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
-		sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+		set_opt(sbi->s_mount_opt, ORDERED_DATA);
 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
-		sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
+		set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
 
 	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
@@ -2477,7 +2546,7 @@
 		goto failed_mount;
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
 	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
@@ -2766,7 +2835,7 @@
 	      EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		ext4_msg(sb, KERN_ERR, "required journal recovery "
 		       "suppressed and not mounted read-only");
-		goto failed_mount4;
+		goto failed_mount_wq;
 	} else {
 		clear_opt(sbi->s_mount_opt, DATA_FLAGS);
 		set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
@@ -2779,7 +2848,7 @@
 	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
 				       JBD2_FEATURE_INCOMPAT_64BIT)) {
 		ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
-		goto failed_mount4;
+		goto failed_mount_wq;
 	}
 
 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
@@ -2818,7 +2887,7 @@
 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
 			ext4_msg(sb, KERN_ERR, "Journal does not support "
 			       "requested data journaling mode");
-			goto failed_mount4;
+			goto failed_mount_wq;
 		}
 	default:
 		break;
@@ -2826,13 +2895,17 @@
 	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 
 no_journal:
-
 	if (test_opt(sb, NOBH)) {
 		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
 			ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
 				"its supported only with writeback mode");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
+		if (test_opt(sb, DIOREAD_NOLOCK)) {
+			ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
+				"not supported with nobh mode");
+			goto failed_mount_wq;
+		}
 	}
 	EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
 	if (!EXT4_SB(sb)->dio_unwritten_wq) {
@@ -2897,6 +2970,18 @@
 			 "requested data journaling mode");
 		clear_opt(sbi->s_mount_opt, DELALLOC);
 	}
+	if (test_opt(sb, DIOREAD_NOLOCK)) {
+		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+			ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
+				"option - requested data journaling mode");
+			clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+		}
+		if (sb->s_blocksize < PAGE_SIZE) {
+			ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
+				"option - block size is too small");
+			clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
+		}
+	}
 
 	err = ext4_setup_system_zone(sb);
 	if (err) {
@@ -3360,10 +3445,9 @@
 		char nbuf[16];
 
 		errstr = ext4_decode_error(sb, j_errno, nbuf);
-		ext4_warning(sb, __func__, "Filesystem error recorded "
+		ext4_warning(sb, "Filesystem error recorded "
 			     "from previous mount: %s", errstr);
-		ext4_warning(sb, __func__, "Marking fs in need of "
-			     "filesystem check.");
+		ext4_warning(sb, "Marking fs in need of filesystem check.");
 
 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -3514,7 +3598,7 @@
 		ext4_abort(sb, __func__, "Abort forced by user");
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
 	es = sbi->s_es;
 
@@ -3708,7 +3792,7 @@
  * Process 1                         Process 2
  * ext4_create()                     quota_sync()
  *   jbd2_journal_start()                  write_dquot()
- *   vfs_dq_init()                         down(dqio_mutex)
+ *   dquot_initialize()                         down(dqio_mutex)
  *     down(dqio_mutex)                    jbd2_journal_start()
  *
  */
@@ -3917,9 +4001,7 @@
 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
 	int err = 0;
 	int offset = off & (sb->s_blocksize - 1);
-	int tocopy;
 	int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
-	size_t towrite = len;
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
@@ -3929,52 +4011,53 @@
 			(unsigned long long)off, (unsigned long long)len);
 		return -EIO;
 	}
-	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
-	while (towrite > 0) {
-		tocopy = sb->s_blocksize - offset < towrite ?
-				sb->s_blocksize - offset : towrite;
-		bh = ext4_bread(handle, inode, blk, 1, &err);
-		if (!bh)
-			goto out;
-		if (journal_quota) {
-			err = ext4_journal_get_write_access(handle, bh);
-			if (err) {
-				brelse(bh);
-				goto out;
-			}
-		}
-		lock_buffer(bh);
-		memcpy(bh->b_data+offset, data, tocopy);
-		flush_dcache_page(bh->b_page);
-		unlock_buffer(bh);
-		if (journal_quota)
-			err = ext4_handle_dirty_metadata(handle, NULL, bh);
-		else {
-			/* Always do at least ordered writes for quotas */
-			err = ext4_jbd2_file_inode(handle, inode);
-			mark_buffer_dirty(bh);
-		}
-		brelse(bh);
-		if (err)
-			goto out;
-		offset = 0;
-		towrite -= tocopy;
-		data += tocopy;
-		blk++;
+	/*
+	 * Since we account only one data block in transaction credits,
+	 * then it is impossible to cross a block boundary.
+	 */
+	if (sb->s_blocksize - offset < len) {
+		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
+			" cancelled because not block aligned",
+			(unsigned long long)off, (unsigned long long)len);
+		return -EIO;
 	}
+
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
+	bh = ext4_bread(handle, inode, blk, 1, &err);
+	if (!bh)
+		goto out;
+	if (journal_quota) {
+		err = ext4_journal_get_write_access(handle, bh);
+		if (err) {
+			brelse(bh);
+			goto out;
+		}
+	}
+	lock_buffer(bh);
+	memcpy(bh->b_data+offset, data, len);
+	flush_dcache_page(bh->b_page);
+	unlock_buffer(bh);
+	if (journal_quota)
+		err = ext4_handle_dirty_metadata(handle, NULL, bh);
+	else {
+		/* Always do at least ordered writes for quotas */
+		err = ext4_jbd2_file_inode(handle, inode);
+		mark_buffer_dirty(bh);
+	}
+	brelse(bh);
 out:
-	if (len == towrite) {
+	if (err) {
 		mutex_unlock(&inode->i_mutex);
 		return err;
 	}
-	if (inode->i_size < off+len-towrite) {
-		i_size_write(inode, off+len-towrite);
+	if (inode->i_size < off + len) {
+		i_size_write(inode, off + len);
 		EXT4_I(inode)->i_disksize = inode->i_size;
 	}
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 	ext4_mark_inode_dirty(handle, inode);
 	mutex_unlock(&inode->i_mutex);
-	return len - towrite;
+	return len;
 }
 
 #endif
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f3a2f7e..b4c5aa8 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -227,7 +227,8 @@
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
-bad_block:	ext4_error(inode->i_sb, __func__,
+bad_block:
+		ext4_error(inode->i_sb,
 			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
@@ -267,7 +268,7 @@
 	void *end;
 	int error;
 
-	if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR))
+	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
 		return -ENODATA;
 	error = ext4_get_inode_loc(inode, &iloc);
 	if (error)
@@ -371,7 +372,7 @@
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
-		ext4_error(inode->i_sb, __func__,
+		ext4_error(inode->i_sb,
 			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
@@ -396,7 +397,7 @@
 	void *end;
 	int error;
 
-	if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR))
+	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
 		return 0;
 	error = ext4_get_inode_loc(inode, &iloc);
 	if (error)
@@ -494,7 +495,7 @@
 		error = ext4_handle_dirty_metadata(handle, inode, bh);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -665,9 +666,8 @@
 			atomic_read(&(bs->bh->b_count)),
 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
 		if (ext4_xattr_check_block(bs->bh)) {
-			ext4_error(sb, __func__,
-				"inode %lu: bad block %llu", inode->i_ino,
-				EXT4_I(inode)->i_file_acl);
+			ext4_error(sb, "inode %lu: bad block %llu",
+				   inode->i_ino, EXT4_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
 		}
@@ -787,8 +787,8 @@
 			else {
 				/* The old block is released after updating
 				   the inode. */
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1))
+				error = dquot_alloc_block(inode, 1);
+				if (error)
 					goto cleanup;
 				error = ext4_journal_get_write_access(handle,
 								      new_bh);
@@ -876,13 +876,12 @@
 	return error;
 
 cleanup_dquot:
-	vfs_dq_free_block(inode, 1);
+	dquot_free_block(inode, 1);
 	goto cleanup;
 
 bad_block:
-	ext4_error(inode->i_sb, __func__,
-		   "inode %lu: bad block %llu", inode->i_ino,
-		   EXT4_I(inode)->i_file_acl);
+	ext4_error(inode->i_sb, "inode %lu: bad block %llu",
+		   inode->i_ino, EXT4_I(inode)->i_file_acl);
 	goto cleanup;
 
 #undef header
@@ -908,7 +907,7 @@
 	is->s.base = is->s.first = IFIRST(header);
 	is->s.here = is->s.first;
 	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
-	if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
 		error = ext4_xattr_check_names(IFIRST(header), is->s.end);
 		if (error)
 			return error;
@@ -940,10 +939,10 @@
 	header = IHDR(inode, ext4_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
 		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
-		EXT4_I(inode)->i_state |= EXT4_STATE_XATTR;
+		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
 	} else {
 		header->h_magic = cpu_to_le32(0);
-		EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR;
+		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
 	}
 	return 0;
 }
@@ -986,8 +985,8 @@
 	if (strlen(name) > 255)
 		return -ERANGE;
 	down_write(&EXT4_I(inode)->xattr_sem);
-	no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND;
-	EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
+	no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
+	ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
 
 	error = ext4_get_inode_loc(inode, &is.iloc);
 	if (error)
@@ -997,10 +996,10 @@
 	if (error)
 		goto cleanup;
 
-	if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
+	if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
 		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
 		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
-		EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW;
+		ext4_clear_inode_state(inode, EXT4_STATE_NEW);
 	}
 
 	error = ext4_xattr_ibody_find(inode, &i, &is);
@@ -1052,7 +1051,7 @@
 		ext4_xattr_update_super_block(handle, inode->i_sb);
 		inode->i_ctime = ext4_current_time(inode);
 		if (!value)
-			EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
+			ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
 		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
 		/*
 		 * The bh is consumed by ext4_mark_iloc_dirty, even with
@@ -1067,7 +1066,7 @@
 	brelse(is.iloc.bh);
 	brelse(bs.bh);
 	if (no_expand == 0)
-		EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
+		ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
 	up_write(&EXT4_I(inode)->xattr_sem);
 	return error;
 }
@@ -1195,9 +1194,8 @@
 		if (!bh)
 			goto cleanup;
 		if (ext4_xattr_check_block(bh)) {
-			ext4_error(inode->i_sb, __func__,
-				"inode %lu: bad block %llu", inode->i_ino,
-				EXT4_I(inode)->i_file_acl);
+			ext4_error(inode->i_sb, "inode %lu: bad block %llu",
+				   inode->i_ino, EXT4_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
 		}
@@ -1302,6 +1300,8 @@
 
 		/* Remove the chosen entry from the inode */
 		error = ext4_xattr_ibody_set(handle, inode, &i, is);
+		if (error)
+			goto cleanup;
 
 		entry = IFIRST(header);
 		if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
@@ -1372,16 +1372,14 @@
 		goto cleanup;
 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh) {
-		ext4_error(inode->i_sb, __func__,
-			"inode %lu: block %llu read error", inode->i_ino,
-			EXT4_I(inode)->i_file_acl);
+		ext4_error(inode->i_sb, "inode %lu: block %llu read error",
+			   inode->i_ino, EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-		ext4_error(inode->i_sb, __func__,
-			"inode %lu: bad block %llu", inode->i_ino,
-			EXT4_I(inode)->i_file_acl);
+		ext4_error(inode->i_sb, "inode %lu: bad block %llu",
+			   inode->i_ino, EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
 	ext4_xattr_release_block(handle, inode, bh);
@@ -1506,7 +1504,7 @@
 		}
 		bh = sb_bread(inode->i_sb, ce->e_block);
 		if (!bh) {
-			ext4_error(inode->i_sb, __func__,
+			ext4_error(inode->i_sb,
 				"inode %lu: block %lu read error",
 				inode->i_ino, (unsigned long) ce->e_block);
 		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 14da530..fbeecdc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -577,7 +577,7 @@
 	return i_pos;
 }
 
-static int fat_write_inode(struct inode *inode, int wait)
+static int __fat_write_inode(struct inode *inode, int wait)
 {
 	struct super_block *sb = inode->i_sb;
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -634,9 +634,14 @@
 	return err;
 }
 
+static int fat_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int fat_sync_inode(struct inode *inode)
 {
-	return fat_write_inode(inode, 1);
+	return __fat_write_inode(inode, 1);
 }
 
 EXPORT_SYMBOL_GPL(fat_sync_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1a7c42c..76fc4d5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -381,10 +381,10 @@
 	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
 
-static int write_inode(struct inode *inode, int sync)
+static int write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
-		return inode->i_sb->s_op->write_inode(inode, sync);
+		return inode->i_sb->s_op->write_inode(inode, wbc);
 	return 0;
 }
 
@@ -421,7 +421,6 @@
 writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct address_space *mapping = inode->i_mapping;
-	int wait = wbc->sync_mode == WB_SYNC_ALL;
 	unsigned dirty;
 	int ret;
 
@@ -439,7 +438,7 @@
 		 * We'll have another go at writing back this inode when we
 		 * completed a full scan of b_io.
 		 */
-		if (!wait) {
+		if (wbc->sync_mode != WB_SYNC_ALL) {
 			requeue_io(inode);
 			return 0;
 		}
@@ -461,15 +460,20 @@
 
 	ret = do_writepages(mapping, wbc);
 
-	/* Don't write the inode if only I_DIRTY_PAGES was set */
-	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
-		int err = write_inode(inode, wait);
+	/*
+	 * Make sure to wait on the data before writing out the metadata.
+	 * This is important for filesystems that modify metadata on data
+	 * I/O completion.
+	 */
+	if (wbc->sync_mode == WB_SYNC_ALL) {
+		int err = filemap_fdatawait(mapping);
 		if (ret == 0)
 			ret = err;
 	}
 
-	if (wait) {
-		int err = filemap_fdatawait(mapping);
+	/* Don't write the inode if only I_DIRTY_PAGES was set */
+	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+		int err = write_inode(inode, wbc);
 		if (ret == 0)
 			ret = err;
 	}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 84350e1..4e64352 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -976,102 +976,11 @@
 }
 
 /**
- * gfs2_readlinki - return the contents of a symlink
- * @ip: the symlink's inode
- * @buf: a pointer to the buffer to be filled
- * @len: a pointer to the length of @buf
- *
- * If @buf is too small, a piece of memory is kmalloc()ed and needs
- * to be freed by the caller.
- *
- * Returns: errno
- */
-
-static int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
-{
-	struct gfs2_holder i_gh;
-	struct buffer_head *dibh;
-	unsigned int x;
-	int error;
-
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
-	error = gfs2_glock_nq(&i_gh);
-	if (error) {
-		gfs2_holder_uninit(&i_gh);
-		return error;
-	}
-
-	if (!ip->i_disksize) {
-		gfs2_consist_inode(ip);
-		error = -EIO;
-		goto out;
-	}
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		goto out;
-
-	x = ip->i_disksize + 1;
-	if (x > *len) {
-		*buf = kmalloc(x, GFP_NOFS);
-		if (!*buf) {
-			error = -ENOMEM;
-			goto out_brelse;
-		}
-	}
-
-	memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
-	*len = x;
-
-out_brelse:
-	brelse(dibh);
-out:
-	gfs2_glock_dq_uninit(&i_gh);
-	return error;
-}
-
-/**
- * gfs2_readlink - Read the value of a symlink
- * @dentry: the symlink
- * @buf: the buffer to read the symlink data into
- * @size: the size of the buffer
- *
- * Returns: errno
- */
-
-static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
-			 int user_size)
-{
-	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-	char array[GFS2_FAST_NAME_SIZE], *buf = array;
-	unsigned int len = GFS2_FAST_NAME_SIZE;
-	int error;
-
-	error = gfs2_readlinki(ip, &buf, &len);
-	if (error)
-		return error;
-
-	if (user_size > len - 1)
-		user_size = len - 1;
-
-	if (copy_to_user(user_buf, buf, user_size))
-		error = -EFAULT;
-	else
-		error = user_size;
-
-	if (buf != array)
-		kfree(buf);
-
-	return error;
-}
-
-/**
  * gfs2_follow_link - Follow a symbolic link
  * @dentry: The dentry of the link
  * @nd: Data that we pass to vfs_follow_link()
  *
- * This can handle symlinks of any size. It is optimised for symlinks
- * under GFS2_FAST_NAME_SIZE.
+ * This can handle symlinks of any size.
  *
  * Returns: 0 on success or error code
  */
@@ -1079,19 +988,50 @@
 static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-	char array[GFS2_FAST_NAME_SIZE], *buf = array;
-	unsigned int len = GFS2_FAST_NAME_SIZE;
+	struct gfs2_holder i_gh;
+	struct buffer_head *dibh;
+	unsigned int x;
+	char *buf;
 	int error;
 
-	error = gfs2_readlinki(ip, &buf, &len);
-	if (!error) {
-		error = vfs_follow_link(nd, buf);
-		if (buf != array)
-			kfree(buf);
-	} else
-		path_put(&nd->path);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+	error = gfs2_glock_nq(&i_gh);
+	if (error) {
+		gfs2_holder_uninit(&i_gh);
+		nd_set_link(nd, ERR_PTR(error));
+		return NULL;
+	}
 
-	return ERR_PTR(error);
+	if (!ip->i_disksize) {
+		gfs2_consist_inode(ip);
+		buf = ERR_PTR(-EIO);
+		goto out;
+	}
+
+	error = gfs2_meta_inode_buffer(ip, &dibh);
+	if (error) {
+		buf = ERR_PTR(error);
+		goto out;
+	}
+
+	x = ip->i_disksize + 1;
+	buf = kmalloc(x, GFP_NOFS);
+	if (!buf)
+		buf = ERR_PTR(-ENOMEM);
+	else
+		memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
+	brelse(dibh);
+out:
+	gfs2_glock_dq_uninit(&i_gh);
+	nd_set_link(nd, buf);
+	return NULL;
+}
+
+static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		kfree(s);
 }
 
 /**
@@ -1426,8 +1366,9 @@
 };
 
 const struct inode_operations gfs2_symlink_iops = {
-	.readlink = gfs2_readlink,
+	.readlink = generic_readlink,
 	.follow_link = gfs2_follow_link,
+	.put_link = gfs2_put_link,
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e3bf6ea..6dbcbad 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1083,7 +1083,7 @@
 	}
 }
 
-int gfs2_quota_sync(struct super_block *sb, int type)
+int gfs2_quota_sync(struct super_block *sb, int type, int wait)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_quota_data **qda;
@@ -1127,6 +1127,11 @@
 	return error;
 }
 
+static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
+{
+	return gfs2_quota_sync(sb, type, 0);
+}
+
 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
 {
 	struct gfs2_quota_data *qd;
@@ -1382,7 +1387,7 @@
 					   &tune->gt_statfs_quantum);
 
 		/* Update quota file */
-		quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
+		quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t,
 				   &quotad_timeo, &tune->gt_quota_quantum);
 
 		/* Check for & recover partially truncated inodes */
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e271fa0..195f60c 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -25,7 +25,7 @@
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 			      u32 uid, u32 gid);
 
-extern int gfs2_quota_sync(struct super_block *sb, int type);
+extern int gfs2_quota_sync(struct super_block *sb, int type, int wait);
 extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 
 extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e5e2262..50aac60 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -22,6 +22,7 @@
 #include <linux/crc32.h>
 #include <linux/time.h>
 #include <linux/wait.h>
+#include <linux/writeback.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -711,7 +712,7 @@
  * Returns: errno
  */
 
-static int gfs2_write_inode(struct inode *inode, int sync)
+static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -745,7 +746,7 @@
 do_unlock:
 	gfs2_glock_dq_uninit(&gh);
 do_flush:
-	if (sync != 0)
+	if (wbc->sync_mode == WB_SYNC_ALL)
 		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
 	return ret;
 }
@@ -763,7 +764,7 @@
 	int error;
 
 	flush_workqueue(gfs2_delete_workqueue);
-	gfs2_quota_sync(sdp->sd_vfs, 0);
+	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
 	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index a0db1c9..b5f1a46 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -167,7 +167,7 @@
 	if (simple_strtol(buf, NULL, 0) != 1)
 		return -EINVAL;
 
-	gfs2_quota_sync(sdp->sd_vfs, 0);
+	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
 	return len;
 }
 
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 052387e..fe35e3b 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -188,7 +188,7 @@
 
 extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
 extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
-extern int hfs_write_inode(struct inode *, int);
+extern int hfs_write_inode(struct inode *, struct writeback_control *);
 extern int hfs_inode_setattr(struct dentry *, struct iattr *);
 extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
 			__be32 log_size, __be32 phys_size, u32 clump_size);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a1cbff2..14f5cb1 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -381,7 +381,7 @@
 					 HFS_SB(inode->i_sb)->alloc_blksz);
 }
 
-int hfs_write_inode(struct inode *inode, int unused)
+int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct inode *main_inode = inode;
 	struct hfs_find_data fd;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 43022f3..74b473a 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -87,7 +87,8 @@
 	return ERR_PTR(err);
 }
 
-static int hfsplus_write_inode(struct inode *inode, int unused)
+static int hfsplus_write_inode(struct inode *inode,
+		struct writeback_control *wbc)
 {
 	struct hfsplus_vh *vhdr;
 	int ret = 0;
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 1aa88c4..6a2f04b 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -353,7 +353,7 @@
 }
 
 int hpfs_ea_write(struct super_block *s, secno a, int ano, unsigned pos,
-	     unsigned len, char *buf)
+	     unsigned len, const char *buf)
 {
 	struct buffer_head *bh;
 	char *data;
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 940d6d1..67d9d36 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -20,8 +20,8 @@
 
 	if (l == 1) if (qstr->name[0]=='.') goto x;
 	if (l == 2) if (qstr->name[0]=='.' || qstr->name[1]=='.') goto x;
-	hpfs_adjust_length((char *)qstr->name, &l);
-	/*if (hpfs_chk_name((char *)qstr->name,&l))*/
+	hpfs_adjust_length(qstr->name, &l);
+	/*if (hpfs_chk_name(qstr->name,&l))*/
 		/*return -ENAMETOOLONG;*/
 		/*return -ENOENT;*/
 	x:
@@ -38,14 +38,16 @@
 {
 	unsigned al=a->len;
 	unsigned bl=b->len;
-	hpfs_adjust_length((char *)a->name, &al);
-	/*hpfs_adjust_length((char *)b->name, &bl);*/
+	hpfs_adjust_length(a->name, &al);
+	/*hpfs_adjust_length(b->name, &bl);*/
 	/* 'a' is the qstr of an already existing dentry, so the name
 	 * must be valid. 'b' must be validated first.
 	 */
 
-	if (hpfs_chk_name((char *)b->name, &bl)) return 1;
-	if (hpfs_compare_names(dentry->d_sb, (char *)a->name, al, (char *)b->name, bl, 0)) return 1;
+	if (hpfs_chk_name(b->name, &bl))
+		return 1;
+	if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0))
+		return 1;
 	return 0;
 }
 
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 8865c94..26e3964 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -59,7 +59,7 @@
 	struct hpfs_dirent *de;
 	int lc;
 	long old_pos;
-	char *tempname;
+	unsigned char *tempname;
 	int c1, c2 = 0;
 	int ret = 0;
 
@@ -158,11 +158,11 @@
 		tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3);
 		if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) {
 			filp->f_pos = old_pos;
-			if (tempname != (char *)de->name) kfree(tempname);
+			if (tempname != de->name) kfree(tempname);
 			hpfs_brelse4(&qbh);
 			goto out;
 		}
-		if (tempname != (char *)de->name) kfree(tempname);
+		if (tempname != de->name) kfree(tempname);
 		hpfs_brelse4(&qbh);
 	}
 out:
@@ -187,7 +187,7 @@
 
 struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
 	struct quad_buffer_head qbh;
 	struct hpfs_dirent *de;
@@ -197,7 +197,7 @@
 	struct hpfs_inode_info *hpfs_result;
 
 	lock_kernel();
-	if ((err = hpfs_chk_name((char *)name, &len))) {
+	if ((err = hpfs_chk_name(name, &len))) {
 		if (err == -ENAMETOOLONG) {
 			unlock_kernel();
 			return ERR_PTR(-ENAMETOOLONG);
@@ -209,7 +209,7 @@
 	 * '.' and '..' will never be passed here.
 	 */
 
-	de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *) name, len, NULL, &qbh);
+	de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, NULL, &qbh);
 
 	/*
 	 * This is not really a bailout, just means file not found.
@@ -250,7 +250,7 @@
 	hpfs_result = hpfs_i(result);
 	if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino;
 
-	hpfs_decide_conv(result, (char *)name, len);
+	hpfs_decide_conv(result, name, len);
 
 	if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) {
 		hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index fe83c2b..9b2ffad 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -158,7 +158,8 @@
 
 /* Add an entry to dnode and don't care if it grows over 2048 bytes */
 
-struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, unsigned char *name,
+struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
+				const unsigned char *name,
 				unsigned namelen, secno down_ptr)
 {
 	struct hpfs_dirent *de;
@@ -223,7 +224,7 @@
 /* Add an entry to dnode and do dnode splitting if required */
 
 static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
-			     unsigned char *name, unsigned namelen,
+			     const unsigned char *name, unsigned namelen,
 			     struct hpfs_dirent *new_de, dnode_secno down_ptr)
 {
 	struct quad_buffer_head qbh, qbh1, qbh2;
@@ -231,7 +232,7 @@
 	dnode_secno adno, rdno;
 	struct hpfs_dirent *de;
 	struct hpfs_dirent nde;
-	char *nname;
+	unsigned char *nname;
 	int h;
 	int pos;
 	struct buffer_head *bh;
@@ -305,7 +306,9 @@
 		pos++;
 	}
 	copy_de(new_de = &nde, de);
-	memcpy(name = nname, de->name, namelen = de->namelen);
+	memcpy(nname, de->name, de->namelen);
+	name = nname;
+	namelen = de->namelen;
 	for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | pos, 4);
 	down_ptr = adno;
 	set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
@@ -368,7 +371,8 @@
  * I hope, now it's finally bug-free.
  */
 
-int hpfs_add_dirent(struct inode *i, unsigned char *name, unsigned namelen,
+int hpfs_add_dirent(struct inode *i,
+		    const unsigned char *name, unsigned namelen,
 		    struct hpfs_dirent *new_de, int cdepth)
 {
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
@@ -897,7 +901,8 @@
 
 /* Find a dirent in tree */
 
-struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno, char *name, unsigned len,
+struct hpfs_dirent *map_dirent(struct inode *inode, dnode_secno dno,
+			       const unsigned char *name, unsigned len,
 			       dnode_secno *dd, struct quad_buffer_head *qbh)
 {
 	struct dnode *dnode;
@@ -988,8 +993,8 @@
 struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
 				     struct fnode *f, struct quad_buffer_head *qbh)
 {
-	char *name1;
-	char *name2;
+	unsigned char *name1;
+	unsigned char *name2;
 	int name1len, name2len;
 	struct dnode *d;
 	dnode_secno dno, downd;
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 547a838..45e53d9 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -62,8 +62,8 @@
 	return ret;
 }
 
-static void set_indirect_ea(struct super_block *s, int ano, secno a, char *data,
-			    int size)
+static void set_indirect_ea(struct super_block *s, int ano, secno a,
+			    const char *data, int size)
 {
 	hpfs_ea_write(s, a, ano, 0, size, data);
 }
@@ -186,7 +186,8 @@
  * This driver can't change sizes of eas ('cause I just don't need it).
  */
 
-void hpfs_set_ea(struct inode *inode, struct fnode *fnode, char *key, char *data, int size)
+void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
+		 const char *data, int size)
 {
 	fnode_secno fno = inode->i_ino;
 	struct super_block *s = inode->i_sb;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 701ca54..97bf738 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -215,7 +215,7 @@
 secno hpfs_add_sector_to_btree(struct super_block *, secno, int, unsigned);
 void hpfs_remove_btree(struct super_block *, struct bplus_header *);
 int hpfs_ea_read(struct super_block *, secno, int, unsigned, unsigned, char *);
-int hpfs_ea_write(struct super_block *, secno, int, unsigned, unsigned, char *);
+int hpfs_ea_write(struct super_block *, secno, int, unsigned, unsigned, const char *);
 void hpfs_ea_remove(struct super_block *, secno, int, unsigned);
 void hpfs_truncate_btree(struct super_block *, secno, int, unsigned);
 void hpfs_remove_fnode(struct super_block *, fnode_secno fno);
@@ -244,13 +244,17 @@
 
 void hpfs_add_pos(struct inode *, loff_t *);
 void hpfs_del_pos(struct inode *, loff_t *);
-struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *, unsigned char *, unsigned, secno);
-int hpfs_add_dirent(struct inode *, unsigned char *, unsigned, struct hpfs_dirent *, int);
+struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *,
+				const unsigned char *, unsigned, secno);
+int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned,
+		    struct hpfs_dirent *, int);
 int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int);
 void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *);
 dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno);
 struct hpfs_dirent *map_pos_dirent(struct inode *, loff_t *, struct quad_buffer_head *);
-struct hpfs_dirent *map_dirent(struct inode *, dnode_secno, char *, unsigned, dnode_secno *, struct quad_buffer_head *);
+struct hpfs_dirent *map_dirent(struct inode *, dnode_secno,
+			       const unsigned char *, unsigned, dnode_secno *,
+			       struct quad_buffer_head *);
 void hpfs_remove_dtree(struct super_block *, dnode_secno);
 struct hpfs_dirent *map_fnode_dirent(struct super_block *, fnode_secno, struct fnode *, struct quad_buffer_head *);
 
@@ -259,7 +263,8 @@
 void hpfs_ea_ext_remove(struct super_block *, secno, int, unsigned);
 int hpfs_read_ea(struct super_block *, struct fnode *, char *, char *, int);
 char *hpfs_get_ea(struct super_block *, struct fnode *, char *, int *);
-void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
+void hpfs_set_ea(struct inode *, struct fnode *, const char *,
+		 const char *, int);
 
 /* file.c */
 
@@ -282,7 +287,7 @@
 
 unsigned *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *);
 unsigned *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *);
-char *hpfs_load_code_page(struct super_block *, secno);
+unsigned char *hpfs_load_code_page(struct super_block *, secno);
 secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp);
 struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **);
 struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **);
@@ -292,12 +297,13 @@
 /* name.c */
 
 unsigned char hpfs_upcase(unsigned char *, unsigned char);
-int hpfs_chk_name(unsigned char *, unsigned *);
-char *hpfs_translate_name(struct super_block *, unsigned char *, unsigned, int, int);
-int hpfs_compare_names(struct super_block *, unsigned char *, unsigned, unsigned char *, unsigned, int);
-int hpfs_is_name_long(unsigned char *, unsigned);
-void hpfs_adjust_length(unsigned char *, unsigned *);
-void hpfs_decide_conv(struct inode *, unsigned char *, unsigned);
+int hpfs_chk_name(const unsigned char *, unsigned *);
+unsigned char *hpfs_translate_name(struct super_block *, unsigned char *, unsigned, int, int);
+int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned,
+		       const unsigned char *, unsigned, int);
+int hpfs_is_name_long(const unsigned char *, unsigned);
+void hpfs_adjust_length(const unsigned char *, unsigned *);
+void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned);
 
 /* namei.c */
 
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index fe703ae..ff90aff 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -46,7 +46,7 @@
 	struct fnode *fnode;
 	struct super_block *sb = i->i_sb;
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
-	unsigned char *ea;
+	void *ea;
 	int ea_size;
 
 	if (!(fnode = hpfs_map_fnode(sb, i->i_ino, &bh))) {
@@ -112,7 +112,7 @@
 		}
 	}
 	if (fnode->dirflag) {
-		unsigned n_dnodes, n_subdirs;
+		int n_dnodes, n_subdirs;
 		i->i_mode |= S_IFDIR;
 		i->i_op = &hpfs_dir_iops;
 		i->i_fop = &hpfs_dir_ops;
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index c472458..840d033 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -35,7 +35,7 @@
  * lowercasing table
  */
 
-char *hpfs_load_code_page(struct super_block *s, secno cps)
+unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
 {
 	struct buffer_head *bh;
 	secno cpds;
@@ -71,7 +71,7 @@
 		brelse(bh);
 		return NULL;
 	}
-	ptr = (char *)cpd + cpd->offs[cpi] + 6;
+	ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6;
 	if (!(cp_table = kmalloc(256, GFP_KERNEL))) {
 		printk("HPFS: out of memory for code page table\n");
 		brelse(bh);
@@ -217,7 +217,7 @@
 	if ((dnode = hpfs_map_4sectors(s, secno, qbh, DNODE_RD_AHEAD)))
 		if (hpfs_sb(s)->sb_chk) {
 			unsigned p, pp = 0;
-			unsigned char *d = (char *)dnode;
+			unsigned char *d = (unsigned char *)dnode;
 			int b = 0;
 			if (dnode->magic != DNODE_MAGIC) {
 				hpfs_error(s, "bad magic on dnode %08x", secno);
diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c
index 1f4a964..f24736d 100644
--- a/fs/hpfs/name.c
+++ b/fs/hpfs/name.c
@@ -8,16 +8,16 @@
 
 #include "hpfs_fn.h"
 
-static char *text_postfix[]={
+static const char *text_postfix[]={
 ".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF",
 ".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS",
 ".RC", ".TEX", ".TXT", ".Y", ""};
 
-static char *text_prefix[]={
+static const char *text_prefix[]={
 "AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ",
 "MAKEFILE", "READ.ME", "README", "TERMCAP", ""};
 
-void hpfs_decide_conv(struct inode *inode, unsigned char *name, unsigned len)
+void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len)
 {
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
 	int i;
@@ -71,7 +71,7 @@
 	return dir[a];
 }
 
-int hpfs_chk_name(unsigned char *name, unsigned *len)
+int hpfs_chk_name(const unsigned char *name, unsigned *len)
 {
 	int i;
 	if (*len > 254) return -ENAMETOOLONG;
@@ -83,10 +83,10 @@
 	return 0;
 }
 
-char *hpfs_translate_name(struct super_block *s, unsigned char *from,
+unsigned char *hpfs_translate_name(struct super_block *s, unsigned char *from,
 			  unsigned len, int lc, int lng)
 {
-	char *to;
+	unsigned char *to;
 	int i;
 	if (hpfs_sb(s)->sb_chk >= 2) if (hpfs_is_name_long(from, len) != lng) {
 		printk("HPFS: Long name flag mismatch - name ");
@@ -103,8 +103,9 @@
 	return to;
 }
 
-int hpfs_compare_names(struct super_block *s, unsigned char *n1, unsigned l1,
-		       unsigned char *n2, unsigned l2, int last)
+int hpfs_compare_names(struct super_block *s,
+		       const unsigned char *n1, unsigned l1,
+		       const unsigned char *n2, unsigned l2, int last)
 {
 	unsigned l = l1 < l2 ? l1 : l2;
 	unsigned i;
@@ -120,7 +121,7 @@
 	return 0;
 }
 
-int hpfs_is_name_long(unsigned char *name, unsigned len)
+int hpfs_is_name_long(const unsigned char *name, unsigned len)
 {
 	int i,j;
 	for (i = 0; i < len && name[i] != '.'; i++)
@@ -134,7 +135,7 @@
 
 /* OS/2 clears dots and spaces at the end of file name, so we have to */
 
-void hpfs_adjust_length(unsigned char *name, unsigned *len)
+void hpfs_adjust_length(const unsigned char *name, unsigned *len)
 {
 	if (!*len) return;
 	if (*len == 1 && name[0] == '.') return;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 82b9c4b..11c2b40 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -11,7 +11,7 @@
 
 static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
 	struct quad_buffer_head qbh0;
 	struct buffer_head *bh;
@@ -24,7 +24,7 @@
 	int r;
 	struct hpfs_dirent dee;
 	int err;
-	if ((err = hpfs_chk_name((char *)name, &len))) return err==-ENOENT ? -EINVAL : err;
+	if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
 	lock_kernel();
 	err = -ENOSPC;
 	fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
@@ -62,7 +62,7 @@
 		result->i_mode &= ~0222;
 
 	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee, 0);
 	if (r == 1)
 		goto bail3;
 	if (r == -1) {
@@ -121,7 +121,7 @@
 
 static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
 {
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
 	struct inode *result = NULL;
 	struct buffer_head *bh;
@@ -130,7 +130,7 @@
 	int r;
 	struct hpfs_dirent dee;
 	int err;
-	if ((err = hpfs_chk_name((char *)name, &len)))
+	if ((err = hpfs_chk_name(name, &len)))
 		return err==-ENOENT ? -EINVAL : err;
 	lock_kernel();
 	err = -ENOSPC;
@@ -155,7 +155,7 @@
 	result->i_op = &hpfs_file_iops;
 	result->i_fop = &hpfs_file_ops;
 	result->i_nlink = 1;
-	hpfs_decide_conv(result, (char *)name, len);
+	hpfs_decide_conv(result, name, len);
 	hpfs_i(result)->i_parent_dir = dir->i_ino;
 	result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date);
 	result->i_ctime.tv_nsec = 0;
@@ -170,7 +170,7 @@
 	hpfs_i(result)->mmu_private = 0;
 
 	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
 	if (r == -1) {
@@ -211,7 +211,7 @@
 
 static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 {
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
 	struct buffer_head *bh;
 	struct fnode *fnode;
@@ -220,7 +220,7 @@
 	struct hpfs_dirent dee;
 	struct inode *result = NULL;
 	int err;
-	if ((err = hpfs_chk_name((char *)name, &len))) return err==-ENOENT ? -EINVAL : err;
+	if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
 	if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM;
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -256,7 +256,7 @@
 	init_special_inode(result, mode, rdev);
 
 	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
 	if (r == -1) {
@@ -289,7 +289,7 @@
 
 static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *symlink)
 {
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
 	struct buffer_head *bh;
 	struct fnode *fnode;
@@ -298,7 +298,7 @@
 	struct hpfs_dirent dee;
 	struct inode *result;
 	int err;
-	if ((err = hpfs_chk_name((char *)name, &len))) return err==-ENOENT ? -EINVAL : err;
+	if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
 	lock_kernel();
 	if (hpfs_sb(dir->i_sb)->sb_eas < 2) {
 		unlock_kernel();
@@ -335,7 +335,7 @@
 	result->i_data.a_ops = &hpfs_symlink_aops;
 
 	mutex_lock(&hpfs_i(dir)->i_mutex);
-	r = hpfs_add_dirent(dir, (char *)name, len, &dee, 0);
+	r = hpfs_add_dirent(dir, name, len, &dee, 0);
 	if (r == 1)
 		goto bail2;
 	if (r == -1) {
@@ -345,7 +345,7 @@
 	fnode->len = len;
 	memcpy(fnode->name, name, len > 15 ? 15 : len);
 	fnode->up = dir->i_ino;
-	hpfs_set_ea(result, fnode, "SYMLINK", (char *)symlink, strlen(symlink));
+	hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink));
 	mark_buffer_dirty(bh);
 	brelse(bh);
 
@@ -369,7 +369,7 @@
 
 static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
 {
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
 	struct quad_buffer_head qbh;
 	struct hpfs_dirent *de;
@@ -381,12 +381,12 @@
 	int err;
 
 	lock_kernel();
-	hpfs_adjust_length((char *)name, &len);
+	hpfs_adjust_length(name, &len);
 again:
 	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
 	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
-	de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh);
+	de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
 	if (!de)
 		goto out;
 
@@ -413,22 +413,25 @@
 
 		mutex_unlock(&hpfs_i(dir)->i_mutex);
 		mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
-		d_drop(dentry);
-		spin_lock(&dentry->d_lock);
-		if (atomic_read(&dentry->d_count) > 1 ||
-		    generic_permission(inode, MAY_WRITE, NULL) ||
+		dentry_unhash(dentry);
+		if (!d_unhashed(dentry)) {
+			dput(dentry);
+			unlock_kernel();
+			return -ENOSPC;
+		}
+		if (generic_permission(inode, MAY_WRITE, NULL) ||
 		    !S_ISREG(inode->i_mode) ||
 		    get_write_access(inode)) {
-			spin_unlock(&dentry->d_lock);
 			d_rehash(dentry);
+			dput(dentry);
 		} else {
 			struct iattr newattrs;
-			spin_unlock(&dentry->d_lock);
 			/*printk("HPFS: truncating file before delete.\n");*/
 			newattrs.ia_size = 0;
 			newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
 			err = notify_change(dentry, &newattrs);
 			put_write_access(inode);
+			dput(dentry);
 			if (!err)
 				goto again;
 		}
@@ -451,7 +454,7 @@
 
 static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	unsigned len = dentry->d_name.len;
 	struct quad_buffer_head qbh;
 	struct hpfs_dirent *de;
@@ -462,12 +465,12 @@
 	int err;
 	int r;
 
-	hpfs_adjust_length((char *)name, &len);
+	hpfs_adjust_length(name, &len);
 	lock_kernel();
 	mutex_lock(&hpfs_i(inode)->i_parent_mutex);
 	mutex_lock(&hpfs_i(dir)->i_mutex);
 	err = -ENOENT;
-	de = map_dirent(dir, hpfs_i(dir)->i_dno, (char *)name, len, &dno, &qbh);
+	de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
 	if (!de)
 		goto out;
 
@@ -546,10 +549,10 @@
 static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		struct inode *new_dir, struct dentry *new_dentry)
 {
-	char *old_name = (char *)old_dentry->d_name.name;
-	int old_len = old_dentry->d_name.len;
-	char *new_name = (char *)new_dentry->d_name.name;
-	int new_len = new_dentry->d_name.len;
+	const unsigned char *old_name = old_dentry->d_name.name;
+	unsigned old_len = old_dentry->d_name.len;
+	const unsigned char *new_name = new_dentry->d_name.name;
+	unsigned new_len = new_dentry->d_name.len;
 	struct inode *i = old_dentry->d_inode;
 	struct inode *new_inode = new_dentry->d_inode;
 	struct quad_buffer_head qbh, qbh1;
@@ -560,9 +563,9 @@
 	struct buffer_head *bh;
 	struct fnode *fnode;
 	int err;
-	if ((err = hpfs_chk_name((char *)new_name, &new_len))) return err;
+	if ((err = hpfs_chk_name(new_name, &new_len))) return err;
 	err = 0;
-	hpfs_adjust_length((char *)old_name, &old_len);
+	hpfs_adjust_length(old_name, &old_len);
 
 	lock_kernel();
 	/* order doesn't matter, due to VFS exclusion */
@@ -579,7 +582,7 @@
 		goto end1;
 	}
 
-	if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, (char *)old_name, old_len, &dno, &qbh))) {
+	if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
 		hpfs_error(i->i_sb, "lookup succeeded but map dirent failed");
 		err = -ENOENT;
 		goto end1;
@@ -590,7 +593,7 @@
 	if (new_inode) {
 		int r;
 		if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 1)) != 2) {
-			if ((nde = map_dirent(new_dir, hpfs_i(new_dir)->i_dno, (char *)new_name, new_len, NULL, &qbh1))) {
+			if ((nde = map_dirent(new_dir, hpfs_i(new_dir)->i_dno, new_name, new_len, NULL, &qbh1))) {
 				clear_nlink(new_inode);
 				copy_de(nde, &de);
 				memcpy(nde->name, new_name, new_len);
@@ -618,7 +621,7 @@
 	}
 	
 	if (new_dir == old_dir)
-		if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, (char *)old_name, old_len, &dno, &qbh))) {
+		if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
 			hpfs_unlock_creation(i->i_sb);
 			hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2");
 			err = -ENOENT;
@@ -648,7 +651,7 @@
 		brelse(bh);
 	}
 	hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv;
-	hpfs_decide_conv(i, (char *)new_name, new_len);
+	hpfs_decide_conv(i, new_name, new_len);
 end1:
 	if (old_dir != new_dir)
 		mutex_unlock(&hpfs_i(new_dir)->i_mutex);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 7239efc..2e4dfa8 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -718,7 +718,7 @@
 	struct vfsmount *proc_mnt;
 	int err = -ENOENT;
 
-	proc_mnt = do_kern_mount("proc", 0, "proc", NULL);
+	proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt);
 	if (IS_ERR(proc_mnt))
 		goto out;
 
diff --git a/fs/inode.c b/fs/inode.c
index 03dfeb2..407bf39 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -8,7 +8,6 @@
 #include <linux/mm.h>
 #include <linux/dcache.h>
 #include <linux/init.h>
-#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/module.h>
@@ -314,7 +313,6 @@
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
 	inode_sync_wait(inode);
-	vfs_dq_drop(inode);
 	if (inode->i_sb->s_op->clear_inode)
 		inode->i_sb->s_op->clear_inode(inode);
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
@@ -1211,8 +1209,6 @@
 
 	if (op->delete_inode) {
 		void (*delete)(struct inode *) = op->delete_inode;
-		if (!is_bad_inode(inode))
-			vfs_dq_init(inode);
 		/* Filesystems implementing their own
 		 * s_op->delete_inode are required to call
 		 * truncate_inode_pages and clear_inode()
diff --git a/fs/internal.h b/fs/internal.h
index e96a166..8a03a54 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,6 +70,8 @@
 
 extern void __init mnt_init(void);
 
+extern spinlock_t vfsmount_lock;
+
 /*
  * fs_struct.c
  */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 4bd8825..2c90e3e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -862,12 +862,12 @@
 		/* A buffer which has been freed while still being
 		 * journaled by a previous transaction may end up still
 		 * being dirty here, but we want to avoid writing back
-		 * that buffer in the future now that the last use has
-		 * been committed.  That's not only a performance gain,
-		 * it also stops aliasing problems if the buffer is left
-		 * behind for writeback and gets reallocated for another
+		 * that buffer in the future after the "add to orphan"
+		 * operation been committed,  That's not only a performance
+		 * gain, it also stops aliasing problems if the buffer is
+		 * left behind for writeback and gets reallocated for another
 		 * use in a different page. */
-		if (buffer_freed(bh)) {
+		if (buffer_freed(bh) && !jh->b_next_transaction) {
 			clear_buffer_freed(bh);
 			clear_buffer_jbddirty(bh);
 		}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 006f9ad..99e9fea 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1864,6 +1864,21 @@
 	if (!jh)
 		goto zap_buffer_no_jh;
 
+	/*
+	 * We cannot remove the buffer from checkpoint lists until the
+	 * transaction adding inode to orphan list (let's call it T)
+	 * is committed.  Otherwise if the transaction changing the
+	 * buffer would be cleaned from the journal before T is
+	 * committed, a crash will cause that the correct contents of
+	 * the buffer will be lost.  On the other hand we have to
+	 * clear the buffer dirty bit at latest at the moment when the
+	 * transaction marking the buffer as freed in the filesystem
+	 * structures is committed because from that moment on the
+	 * buffer can be reallocated and used by a different page.
+	 * Since the block hasn't been freed yet but the inode has
+	 * already been added to orphan list, it is safe for us to add
+	 * the buffer to BJ_Forget list of the newest transaction.
+	 */
 	transaction = jh->b_transaction;
 	if (transaction == NULL) {
 		/* First case: not on any transaction.  If it
@@ -1929,16 +1944,15 @@
 			goto zap_buffer;
 		}
 		/*
-		 * If it is committing, we simply cannot touch it.  We
-		 * can remove it's next_transaction pointer from the
-		 * running transaction if that is set, but nothing
-		 * else. */
+		 * The buffer is committing, we simply cannot touch
+		 * it. So we just set j_next_transaction to the
+		 * running transaction (if there is one) and mark
+		 * buffer as freed so that commit code knows it should
+		 * clear dirty bits when it is done with the buffer.
+		 */
 		set_buffer_freed(bh);
-		if (jh->b_next_transaction) {
-			J_ASSERT(jh->b_next_transaction ==
-					journal->j_running_transaction);
-			jh->b_next_transaction = NULL;
-		}
+		if (journal->j_running_transaction && buffer_jbddirty(bh))
+			jh->b_next_transaction = journal->j_running_transaction;
 		journal_put_journal_head(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
@@ -2120,7 +2134,7 @@
  */
 void __journal_refile_buffer(struct journal_head *jh)
 {
-	int was_dirty;
+	int was_dirty, jlist;
 	struct buffer_head *bh = jh2bh(jh);
 
 	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -2142,8 +2156,13 @@
 	__journal_temp_unlink_buffer(jh);
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
-	__journal_file_buffer(jh, jh->b_transaction,
-				jh->b_modified ? BJ_Metadata : BJ_Reserved);
+	if (buffer_freed(bh))
+		jlist = BJ_Forget;
+	else if (jh->b_modified)
+		jlist = BJ_Metadata;
+	else
+		jlist = BJ_Reserved;
+	__journal_file_buffer(jh, jh->b_transaction, jlist);
 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
 	if (was_dirty)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 8868493..30beb11 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -507,6 +507,7 @@
 	if (blocknr < journal->j_tail)
 		freed = freed + journal->j_last - journal->j_first;
 
+	trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
 	jbd_debug(1,
 		  "Cleaning journal tail from %d to %d (offset %lu), "
 		  "freeing %lu\n",
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 1bc74b6..671da7f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -883,8 +883,7 @@
 		spin_unlock(&journal->j_list_lock);
 		bh = jh2bh(jh);
 		jbd_lock_bh_state(bh);
-		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
-			jh->b_transaction == journal->j_running_transaction);
+		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction);
 
 		/*
 		 * If there is undo-protected committed data against
@@ -930,12 +929,12 @@
 		/* A buffer which has been freed while still being
 		 * journaled by a previous transaction may end up still
 		 * being dirty here, but we want to avoid writing back
-		 * that buffer in the future now that the last use has
-		 * been committed.  That's not only a performance gain,
-		 * it also stops aliasing problems if the buffer is left
-		 * behind for writeback and gets reallocated for another
+		 * that buffer in the future after the "add to orphan"
+		 * operation been committed,  That's not only a performance
+		 * gain, it also stops aliasing problems if the buffer is
+		 * left behind for writeback and gets reallocated for another
 		 * use in a different page. */
-		if (buffer_freed(bh)) {
+		if (buffer_freed(bh) && !jh->b_next_transaction) {
 			clear_buffer_freed(bh);
 			clear_buffer_jbddirty(bh);
 		}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ac0d027..c03d4dc 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -39,6 +39,8 @@
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/hash.h>
+#include <linux/log2.h>
+#include <linux/vmalloc.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
@@ -93,6 +95,7 @@
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
+static int jbd2_journal_create_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -1248,6 +1251,13 @@
 		}
 	}
 
+	/*
+	 * Create a slab for this blocksize
+	 */
+	err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
+	if (err)
+		return err;
+
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (jbd2_journal_recover(journal))
@@ -1807,6 +1817,127 @@
 }
 
 /*
+ * JBD memory management
+ *
+ * These functions are used to allocate block-sized chunks of memory
+ * used for making copies of buffer_head data.  Very often it will be
+ * page-sized chunks of data, but sometimes it will be in
+ * sub-page-size chunks.  (For example, 16k pages on Power systems
+ * with a 4k block file system.)  For blocks smaller than a page, we
+ * use a SLAB allocator.  There are slab caches for each block size,
+ * which are allocated at mount time, if necessary, and we only free
+ * (all of) the slab caches when/if the jbd2 module is unloaded.  For
+ * this reason we don't need to a mutex to protect access to
+ * jbd2_slab[] allocating or releasing memory; only in
+ * jbd2_journal_create_slab().
+ */
+#define JBD2_MAX_SLABS 8
+static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
+static DECLARE_MUTEX(jbd2_slab_create_sem);
+
+static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
+	"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
+	"jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
+};
+
+
+static void jbd2_journal_destroy_slabs(void)
+{
+	int i;
+
+	for (i = 0; i < JBD2_MAX_SLABS; i++) {
+		if (jbd2_slab[i])
+			kmem_cache_destroy(jbd2_slab[i]);
+		jbd2_slab[i] = NULL;
+	}
+}
+
+static int jbd2_journal_create_slab(size_t size)
+{
+	int i = order_base_2(size) - 10;
+	size_t slab_size;
+
+	if (size == PAGE_SIZE)
+		return 0;
+
+	if (i >= JBD2_MAX_SLABS)
+		return -EINVAL;
+
+	if (unlikely(i < 0))
+		i = 0;
+	down(&jbd2_slab_create_sem);
+	if (jbd2_slab[i]) {
+		up(&jbd2_slab_create_sem);
+		return 0;	/* Already created */
+	}
+
+	slab_size = 1 << (i+10);
+	jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
+					 slab_size, 0, NULL);
+	up(&jbd2_slab_create_sem);
+	if (!jbd2_slab[i]) {
+		printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static struct kmem_cache *get_slab(size_t size)
+{
+	int i = order_base_2(size) - 10;
+
+	BUG_ON(i >= JBD2_MAX_SLABS);
+	if (unlikely(i < 0))
+		i = 0;
+	BUG_ON(jbd2_slab[i] == 0);
+	return jbd2_slab[i];
+}
+
+void *jbd2_alloc(size_t size, gfp_t flags)
+{
+	void *ptr;
+
+	BUG_ON(size & (size-1)); /* Must be a power of 2 */
+
+	flags |= __GFP_REPEAT;
+	if (size == PAGE_SIZE)
+		ptr = (void *)__get_free_pages(flags, 0);
+	else if (size > PAGE_SIZE) {
+		int order = get_order(size);
+
+		if (order < 3)
+			ptr = (void *)__get_free_pages(flags, order);
+		else
+			ptr = vmalloc(size);
+	} else
+		ptr = kmem_cache_alloc(get_slab(size), flags);
+
+	/* Check alignment; SLUB has gotten this wrong in the past,
+	 * and this can lead to user data corruption! */
+	BUG_ON(((unsigned long) ptr) & (size-1));
+
+	return ptr;
+}
+
+void jbd2_free(void *ptr, size_t size)
+{
+	if (size == PAGE_SIZE) {
+		free_pages((unsigned long)ptr, 0);
+		return;
+	}
+	if (size > PAGE_SIZE) {
+		int order = get_order(size);
+
+		if (order < 3)
+			free_pages((unsigned long)ptr, order);
+		else
+			vfree(ptr);
+		return;
+	}
+	kmem_cache_free(get_slab(size), ptr);
+};
+
+/*
  * Journal_head storage management
  */
 static struct kmem_cache *jbd2_journal_head_cache;
@@ -2204,6 +2335,7 @@
 	jbd2_journal_destroy_revoke_caches();
 	jbd2_journal_destroy_jbd2_journal_head_cache();
 	jbd2_journal_destroy_handle_cache();
+	jbd2_journal_destroy_slabs();
 }
 
 static int __init journal_init(void)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a051270..bfc70f5 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1727,6 +1727,21 @@
 	if (!jh)
 		goto zap_buffer_no_jh;
 
+	/*
+	 * We cannot remove the buffer from checkpoint lists until the
+	 * transaction adding inode to orphan list (let's call it T)
+	 * is committed.  Otherwise if the transaction changing the
+	 * buffer would be cleaned from the journal before T is
+	 * committed, a crash will cause that the correct contents of
+	 * the buffer will be lost.  On the other hand we have to
+	 * clear the buffer dirty bit at latest at the moment when the
+	 * transaction marking the buffer as freed in the filesystem
+	 * structures is committed because from that moment on the
+	 * buffer can be reallocated and used by a different page.
+	 * Since the block hasn't been freed yet but the inode has
+	 * already been added to orphan list, it is safe for us to add
+	 * the buffer to BJ_Forget list of the newest transaction.
+	 */
 	transaction = jh->b_transaction;
 	if (transaction == NULL) {
 		/* First case: not on any transaction.  If it
@@ -1783,16 +1798,15 @@
 	} else if (transaction == journal->j_committing_transaction) {
 		JBUFFER_TRACE(jh, "on committing transaction");
 		/*
-		 * If it is committing, we simply cannot touch it.  We
-		 * can remove it's next_transaction pointer from the
-		 * running transaction if that is set, but nothing
-		 * else. */
+		 * The buffer is committing, we simply cannot touch
+		 * it. So we just set j_next_transaction to the
+		 * running transaction (if there is one) and mark
+		 * buffer as freed so that commit code knows it should
+		 * clear dirty bits when it is done with the buffer.
+		 */
 		set_buffer_freed(bh);
-		if (jh->b_next_transaction) {
-			J_ASSERT(jh->b_next_transaction ==
-					journal->j_running_transaction);
-			jh->b_next_transaction = NULL;
-		}
+		if (journal->j_running_transaction && buffer_jbddirty(bh))
+			jh->b_next_transaction = journal->j_running_transaction;
 		jbd2_journal_put_journal_head(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
@@ -1969,7 +1983,7 @@
  */
 void __jbd2_journal_refile_buffer(struct journal_head *jh)
 {
-	int was_dirty;
+	int was_dirty, jlist;
 	struct buffer_head *bh = jh2bh(jh);
 
 	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -1991,8 +2005,13 @@
 	__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
-	__jbd2_journal_file_buffer(jh, jh->b_transaction,
-				jh->b_modified ? BJ_Metadata : BJ_Reserved);
+	if (buffer_freed(bh))
+		jlist = BJ_Forget;
+	else if (jh->b_modified)
+		jlist = BJ_Metadata;
+	else
+		jlist = BJ_Reserved;
+	__jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
 	if (was_dirty)
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index d66477c..2131697 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -20,7 +20,6 @@
 
 #include <linux/sched.h>
 #include <linux/fs.h>
-#include <linux/quotaops.h>
 #include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
 #include "jfs_txnmgr.h"
@@ -174,7 +173,7 @@
 	return rc;
 }
 
-static int jfs_acl_chmod(struct inode *inode)
+int jfs_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
 	int rc;
@@ -205,26 +204,3 @@
 	posix_acl_release(clone);
 	return rc;
 }
-
-int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
-{
-	struct inode *inode = dentry->d_inode;
-	int rc;
-
-	rc = inode_change_ok(inode, iattr);
-	if (rc)
-		return rc;
-
-	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
-	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		if (vfs_dq_transfer(inode, iattr))
-			return -EDQUOT;
-	}
-
-	rc = inode_setattr(inode, iattr);
-
-	if (!rc && (iattr->ia_valid & ATTR_MODE))
-		rc = jfs_acl_chmod(inode);
-
-	return rc;
-}
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 2b70fa7..14ba982 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_dmap.h"
@@ -47,7 +48,7 @@
 {
 	int rc;
 
-	if ((rc = generic_file_open(inode, file)))
+	if ((rc = dquot_file_open(inode, file)))
 		return rc;
 
 	/*
@@ -88,14 +89,40 @@
 	return 0;
 }
 
+int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	struct inode *inode = dentry->d_inode;
+	int rc;
+
+	rc = inode_change_ok(inode, iattr);
+	if (rc)
+		return rc;
+
+	if (iattr->ia_valid & ATTR_SIZE)
+		dquot_initialize(inode);
+	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
+	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
+		rc = dquot_transfer(inode, iattr);
+		if (rc)
+			return rc;
+	}
+
+	rc = inode_setattr(inode, iattr);
+
+	if (!rc && (iattr->ia_valid & ATTR_MODE))
+		rc = jfs_acl_chmod(inode);
+
+	return rc;
+}
+
 const struct inode_operations jfs_file_inode_operations = {
 	.truncate	= jfs_truncate,
 	.setxattr	= jfs_setxattr,
 	.getxattr	= jfs_getxattr,
 	.listxattr	= jfs_listxattr,
 	.removexattr	= jfs_removexattr,
-#ifdef CONFIG_JFS_POSIX_ACL
 	.setattr	= jfs_setattr,
+#ifdef CONFIG_JFS_POSIX_ACL
 	.check_acl	= jfs_check_acl,
 #endif
 };
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b2ae190..9dd1262 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/writeback.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
@@ -120,8 +121,10 @@
 	return rc;
 }
 
-int jfs_write_inode(struct inode *inode, int wait)
+int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
+	int wait = wbc->sync_mode == WB_SYNC_ALL;
+
 	if (test_cflag(COMMIT_Nolink, inode))
 		return 0;
 	/*
@@ -146,6 +149,9 @@
 {
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
+	if (!is_bad_inode(inode))
+		dquot_initialize(inode);
+
 	if (!is_bad_inode(inode) &&
 	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
 		truncate_inode_pages(&inode->i_data, 0);
@@ -158,9 +164,9 @@
 		/*
 		 * Free the inode from the quota allocation.
 		 */
-		vfs_dq_init(inode);
-		vfs_dq_free_inode(inode);
-		vfs_dq_drop(inode);
+		dquot_initialize(inode);
+		dquot_free_inode(inode);
+		dquot_drop(inode);
 	}
 
 	clear_inode(inode);
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index b07bd41..54e0755 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -22,7 +22,7 @@
 
 int jfs_check_acl(struct inode *, int);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
-int jfs_setattr(struct dentry *, struct iattr *);
+int jfs_acl_chmod(struct inode *inode);
 
 #else
 
@@ -32,5 +32,10 @@
 	return 0;
 }
 
+static inline int jfs_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
+
 #endif
 #endif		/* _H_JFS_ACL */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 925871e..0e4623b 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -381,10 +381,10 @@
 		 * It's time to move the inline table to an external
 		 * page and begin to build the xtree
 		 */
-		if (vfs_dq_alloc_block(ip, sbi->nbperpage))
+		if (dquot_alloc_block(ip, sbi->nbperpage))
 			goto clean_up;
 		if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
-			vfs_dq_free_block(ip, sbi->nbperpage);
+			dquot_free_block(ip, sbi->nbperpage);
 			goto clean_up;
 		}
 
@@ -408,7 +408,7 @@
 			memcpy(&jfs_ip->i_dirtable, temp_table,
 			       sizeof (temp_table));
 			dbFree(ip, xaddr, sbi->nbperpage);
-			vfs_dq_free_block(ip, sbi->nbperpage);
+			dquot_free_block(ip, sbi->nbperpage);
 			goto clean_up;
 		}
 		ip->i_size = PSIZE;
@@ -1027,10 +1027,9 @@
 			n = xlen;
 
 		/* Allocate blocks to quota. */
-		if (vfs_dq_alloc_block(ip, n)) {
-			rc = -EDQUOT;
+		rc = dquot_alloc_block(ip, n);
+		if (rc)
 			goto extendOut;
-		}
 		quota_allocation += n;
 
 		if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
@@ -1308,7 +1307,7 @@
 
 	/* Rollback quota allocation */
 	if (rc && quota_allocation)
-		vfs_dq_free_block(ip, quota_allocation);
+		dquot_free_block(ip, quota_allocation);
 
       dtSplitUp_Exit:
 
@@ -1369,9 +1368,10 @@
 		return -EIO;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
@@ -1892,6 +1892,7 @@
 	struct dt_lock *dtlck;
 	struct tlock *tlck;
 	struct lv *lv;
+	int rc;
 
 	/* get split root page */
 	smp = split->mp;
@@ -1916,9 +1917,10 @@
 	rp = rmp->data;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	BT_MARK_DIRTY(rmp, ip);
@@ -2287,7 +2289,7 @@
 	xlen = lengthPXD(&fp->header.self);
 
 	/* Free quota allocation. */
-	vfs_dq_free_block(ip, xlen);
+	dquot_free_block(ip, xlen);
 
 	/* free/invalidate its buffer page */
 	discard_metapage(fmp);
@@ -2363,7 +2365,7 @@
 				xlen = lengthPXD(&p->header.self);
 
 				/* Free quota allocation */
-				vfs_dq_free_block(ip, xlen);
+				dquot_free_block(ip, xlen);
 
 				/* free/invalidate its buffer page */
 				discard_metapage(mp);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 41d6045..5d3bbd1 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -141,10 +141,11 @@
 	}
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nxlen)) {
+	rc = dquot_alloc_block(ip, nxlen);
+	if (rc) {
 		dbFree(ip, nxaddr, (s64) nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
-		return -EDQUOT;
+		return rc;
 	}
 
 	/* determine the value of the extent flag */
@@ -164,7 +165,7 @@
 	 */
 	if (rc) {
 		dbFree(ip, nxaddr, nxlen);
-		vfs_dq_free_block(ip, nxlen);
+		dquot_free_block(ip, nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return (rc);
 	}
@@ -256,10 +257,11 @@
 		goto exit;
 
 	/* Allocat blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nxlen)) {
+	rc = dquot_alloc_block(ip, nxlen);
+	if (rc) {
 		dbFree(ip, nxaddr, (s64) nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
-		return -EDQUOT;
+		return rc;
 	}
 
 	delta = nxlen - xlen;
@@ -297,7 +299,7 @@
 		/* extend the extent */
 		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
 			dbFree(ip, xaddr + xlen, delta);
-			vfs_dq_free_block(ip, nxlen);
+			dquot_free_block(ip, nxlen);
 			goto exit;
 		}
 	} else {
@@ -308,7 +310,7 @@
 		 */
 		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
 			dbFree(ip, nxaddr, nxlen);
-			vfs_dq_free_block(ip, nxlen);
+			dquot_free_block(ip, nxlen);
 			goto exit;
 		}
 	}
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index dc0e021..829921b 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -116,10 +116,10 @@
 	/*
 	 * Allocate inode to quota.
 	 */
-	if (vfs_dq_alloc_inode(inode)) {
-		rc = -EDQUOT;
+	dquot_initialize(inode);
+	rc = dquot_alloc_inode(inode);
+	if (rc)
 		goto fail_drop;
-	}
 
 	inode->i_mode = mode;
 	/* inherit flags from parent */
@@ -162,7 +162,7 @@
 	return inode;
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 fail_unlock:
 	inode->i_nlink = 0;
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 1eff7db..79e2c79 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -26,7 +26,7 @@
 extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
 extern struct inode *jfs_iget(struct super_block *, unsigned long);
 extern int jfs_commit_inode(struct inode *, int);
-extern int jfs_write_inode(struct inode*, int);
+extern int jfs_write_inode(struct inode *, struct writeback_control *);
 extern void jfs_delete_inode(struct inode *);
 extern void jfs_dirty_inode(struct inode *);
 extern void jfs_truncate(struct inode *);
@@ -40,6 +40,7 @@
 	int fh_len, int fh_type);
 extern void jfs_set_inode_flags(struct inode *);
 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern int jfs_setattr(struct dentry *, struct iattr *);
 
 extern const struct address_space_operations jfs_aops;
 extern const struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index d654a64..6c50871 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -585,10 +585,10 @@
 			hint = addressXAD(xad) + lengthXAD(xad) - 1;
 		} else
 			hint = 0;
-		if ((rc = vfs_dq_alloc_block(ip, xlen)))
+		if ((rc = dquot_alloc_block(ip, xlen)))
 			goto out;
 		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
-			vfs_dq_free_block(ip, xlen);
+			dquot_free_block(ip, xlen);
 			goto out;
 		}
 	}
@@ -617,7 +617,7 @@
 			/* undo data extent allocation */
 			if (*xaddrp == 0) {
 				dbFree(ip, xaddr, (s64) xlen);
-				vfs_dq_free_block(ip, xlen);
+				dquot_free_block(ip, xlen);
 			}
 			return rc;
 		}
@@ -985,10 +985,9 @@
 	rbn = addressPXD(pxd);
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
-		rc = -EDQUOT;
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc)
 		goto clean_up;
-	}
 
 	quota_allocation += lengthPXD(pxd);
 
@@ -1195,7 +1194,7 @@
 
 	/* Rollback quota allocation. */
 	if (quota_allocation)
-		vfs_dq_free_block(ip, quota_allocation);
+		dquot_free_block(ip, quota_allocation);
 
 	return (rc);
 }
@@ -1235,6 +1234,7 @@
 	struct pxdlist *pxdlist;
 	struct tlock *tlck;
 	struct xtlock *xtlck;
+	int rc;
 
 	sp = &JFS_IP(ip)->i_xtroot;
 
@@ -1252,9 +1252,10 @@
 		return -EIO;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
@@ -3680,7 +3681,7 @@
 		ip->i_size = newsize;
 
 	/* update quota allocation to reflect freed blocks */
-	vfs_dq_free_block(ip, nfreed);
+	dquot_free_block(ip, nfreed);
 
 	/*
 	 * free tlock of invalidated pages
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index c79a4270..4a3e9f3 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -85,6 +85,8 @@
 
 	jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name);
 
+	dquot_initialize(dip);
+
 	/*
 	 * search parent directory for entry/freespace
 	 * (dtSearch() returns parent directory page pinned)
@@ -215,6 +217,8 @@
 
 	jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
+	dquot_initialize(dip);
+
 	/* link count overflow on parent directory ? */
 	if (dip->i_nlink == JFS_LINK_MAX) {
 		rc = -EMLINK;
@@ -356,7 +360,8 @@
 	jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
-	vfs_dq_init(ip);
+	dquot_initialize(dip);
+	dquot_initialize(ip);
 
 	/* directory must be empty to be removed */
 	if (!dtEmpty(ip)) {
@@ -483,7 +488,8 @@
 	jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
-	vfs_dq_init(ip);
+	dquot_initialize(dip);
+	dquot_initialize(ip);
 
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
@@ -805,6 +811,8 @@
 	if (ip->i_nlink == 0)
 		return -ENOENT;
 
+	dquot_initialize(dir);
+
 	tid = txBegin(ip->i_sb, 0);
 
 	mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
@@ -896,6 +904,8 @@
 
 	jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name);
 
+	dquot_initialize(dip);
+
 	ssize = strlen(name) + 1;
 
 	/*
@@ -1087,6 +1097,9 @@
 	jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
 		 new_dentry->d_name.name);
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	old_ip = old_dentry->d_inode;
 	new_ip = new_dentry->d_inode;
 
@@ -1136,7 +1149,7 @@
 	} else if (new_ip) {
 		IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
 		/* Init inode for quota operations. */
-		vfs_dq_init(new_ip);
+		dquot_initialize(new_ip);
 	}
 
 	/*
@@ -1360,6 +1373,8 @@
 
 	jfs_info("jfs_mknod: %s", dentry->d_name.name);
 
+	dquot_initialize(dir);
+
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
 
@@ -1541,8 +1556,8 @@
 	.getxattr	= jfs_getxattr,
 	.listxattr	= jfs_listxattr,
 	.removexattr	= jfs_removexattr,
-#ifdef CONFIG_JFS_POSIX_ACL
 	.setattr	= jfs_setattr,
+#ifdef CONFIG_JFS_POSIX_ACL
 	.check_acl	= jfs_check_acl,
 #endif
 };
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index d929a82..266699d 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -131,6 +131,11 @@
 	kmem_cache_free(jfs_inode_cachep, ji);
 }
 
+static void jfs_clear_inode(struct inode *inode)
+{
+	dquot_drop(inode);
+}
+
 static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
@@ -745,6 +750,7 @@
 	.dirty_inode	= jfs_dirty_inode,
 	.write_inode	= jfs_write_inode,
 	.delete_inode	= jfs_delete_inode,
+	.clear_inode	= jfs_clear_inode,
 	.put_super	= jfs_put_super,
 	.sync_fs	= jfs_sync_fs,
 	.freeze_fs	= jfs_freeze,
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fad3645..1f594ab 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -260,14 +260,14 @@
 	nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits;
 
 	/* Allocate new blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nblocks)) {
-		return -EDQUOT;
-	}
+	rc = dquot_alloc_block(ip, nblocks);
+	if (rc)
+		return rc;
 
 	rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno);
 	if (rc) {
 		/*Rollback quota allocation. */
-		vfs_dq_free_block(ip, nblocks);
+		dquot_free_block(ip, nblocks);
 		return rc;
 	}
 
@@ -332,7 +332,7 @@
 
       failed:
 	/* Rollback quota allocation. */
-	vfs_dq_free_block(ip, nblocks);
+	dquot_free_block(ip, nblocks);
 
 	dbFree(ip, blkno, nblocks);
 	return rc;
@@ -538,7 +538,8 @@
 
 	if (blocks_needed > current_blocks) {
 		/* Allocate new blocks to quota. */
-		if (vfs_dq_alloc_block(inode, blocks_needed))
+		rc = dquot_alloc_block(inode, blocks_needed);
+		if (rc)
 			return -EDQUOT;
 
 		quota_allocation = blocks_needed;
@@ -602,7 +603,7 @@
       clean_up:
 	/* Rollback quota allocation */
 	if (quota_allocation)
-		vfs_dq_free_block(inode, quota_allocation);
+		dquot_free_block(inode, quota_allocation);
 
 	return (rc);
 }
@@ -677,7 +678,7 @@
 
 	/* If old blocks exist, they must be removed from quota allocation. */
 	if (old_blocks)
-		vfs_dq_free_block(inode, old_blocks);
+		dquot_free_block(inode, old_blocks);
 
 	inode->i_ctime = CURRENT_TIME;
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 6e8d17e..9e50bcf 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -338,28 +338,14 @@
 	return 0;
 }
 
-int simple_prepare_write(struct file *file, struct page *page,
-			unsigned from, unsigned to)
-{
-	if (!PageUptodate(page)) {
-		if (to - from != PAGE_CACHE_SIZE)
-			zero_user_segments(page,
-				0, from,
-				to, PAGE_CACHE_SIZE);
-	}
-	return 0;
-}
-
 int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
 	struct page *page;
 	pgoff_t index;
-	unsigned from;
 
 	index = pos >> PAGE_CACHE_SHIFT;
-	from = pos & (PAGE_CACHE_SIZE - 1);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -367,14 +353,48 @@
 
 	*pagep = page;
 
-	return simple_prepare_write(file, page, from, from+len);
+	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+		zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
+	}
+	return 0;
 }
 
-static int simple_commit_write(struct file *file, struct page *page,
-			       unsigned from, unsigned to)
+/**
+ * simple_write_end - .write_end helper for non-block-device FSes
+ * @available: See .write_end of address_space_operations
+ * @file: 		"
+ * @mapping: 		"
+ * @pos: 		"
+ * @len: 		"
+ * @copied: 		"
+ * @page: 		"
+ * @fsdata: 		"
+ *
+ * simple_write_end does the minimum needed for updating a page after writing is
+ * done. It has the same API signature as the .write_end of
+ * address_space_operations vector. So it can just be set onto .write_end for
+ * FSes that don't need any other processing. i_mutex is assumed to be held.
+ * Block based filesystems should use generic_write_end().
+ * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
+ * is not called, so a filesystem that actually does store data in .write_inode
+ * should extend on what's done here with a call to mark_inode_dirty() in the
+ * case that i_size has changed.
+ */
+int simple_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
 	struct inode *inode = page->mapping->host;
-	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	loff_t last_pos = pos + copied;
+
+	/* zero the stale part of the page if we did a short copy */
+	if (copied < len) {
+		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+		zero_user(page, from + copied, len - copied);
+	}
 
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
@@ -382,28 +402,10 @@
 	 * No need to use i_size_read() here, the i_size
 	 * cannot change under us because we hold the i_mutex.
 	 */
-	if (pos > inode->i_size)
-		i_size_write(inode, pos);
+	if (last_pos > inode->i_size)
+		i_size_write(inode, last_pos);
+
 	set_page_dirty(page);
-	return 0;
-}
-
-int simple_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata)
-{
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
-
-	/* zero the stale part of the page if we did a short copy */
-	if (copied < len) {
-		void *kaddr = kmap_atomic(page, KM_USER0);
-		memset(kaddr + from + copied, 0, len - copied);
-		flush_dcache_page(page);
-		kunmap_atomic(kaddr, KM_USER0);
-	}
-
-	simple_commit_write(file, page, from, from+copied);
-
 	unlock_page(page);
 	page_cache_release(page);
 
@@ -853,7 +855,6 @@
 EXPORT_SYMBOL(simple_link);
 EXPORT_SYMBOL(simple_lookup);
 EXPORT_SYMBOL(simple_pin_fs);
-EXPORT_UNUSED_SYMBOL(simple_prepare_write);
 EXPORT_SYMBOL(simple_readpage);
 EXPORT_SYMBOL(simple_release_fs);
 EXPORT_SYMBOL(simple_rename);
diff --git a/fs/locks.c b/fs/locks.c
index a8794f2..ae9ded0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1182,8 +1182,9 @@
 	struct file_lock *fl;
 	unsigned long break_time;
 	int i_have_this_lease = 0;
+	int want_write = (mode & O_ACCMODE) != O_RDONLY;
 
-	new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK);
+	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
 
 	lock_kernel();
 
@@ -1197,7 +1198,7 @@
 		if (fl->fl_owner == current->files)
 			i_have_this_lease = 1;
 
-	if (mode & FMODE_WRITE) {
+	if (want_write) {
 		/* If we want write access, we have to revoke any lease. */
 		future = F_UNLCK | F_INPROGRESS;
 	} else if (flock->fl_type & F_INPROGRESS) {
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 74ea82d..756f8c9 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -17,8 +17,10 @@
 #include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 
-static int minix_write_inode(struct inode * inode, int wait);
+static int minix_write_inode(struct inode *inode,
+		struct writeback_control *wbc);
 static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
@@ -552,7 +554,7 @@
 	return bh;
 }
 
-static int minix_write_inode(struct inode *inode, int wait)
+static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err = 0;
 	struct buffer_head *bh;
@@ -563,7 +565,7 @@
 		bh = V2_minix_update_inode(inode);
 	if (!bh)
 		return -EIO;
-	if (wait && buffer_dirty(bh)) {
+	if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) {
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh)) {
 			printk("IO error syncing minix inode [%s:%08lx]\n",
diff --git a/fs/namei.c b/fs/namei.c
index a4855af..3d9d2f9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
-#include <linux/quotaops.h>
 #include <linux/pagemap.h>
 #include <linux/fsnotify.h>
 #include <linux/personality.h>
@@ -498,8 +497,6 @@
 
 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
 {
-	int res = 0;
-	char *name;
 	if (IS_ERR(link))
 		goto fail;
 
@@ -510,22 +507,7 @@
 		path_get(&nd->root);
 	}
 
-	res = link_path_walk(link, nd);
-	if (nd->depth || res || nd->last_type!=LAST_NORM)
-		return res;
-	/*
-	 * If it is an iterative symlinks resolution in open_namei() we
-	 * have to copy the last component. And all that crap because of
-	 * bloody create() on broken symlinks. Furrfu...
-	 */
-	name = __getname();
-	if (unlikely(!name)) {
-		path_put(&nd->path);
-		return -ENOMEM;
-	}
-	strcpy(name, nd->last.name);
-	nd->last.name = name;
-	return 0;
+	return link_path_walk(link, nd);
 fail:
 	path_put(&nd->path);
 	return PTR_ERR(link);
@@ -547,10 +529,10 @@
 	nd->path.dentry = path->dentry;
 }
 
-static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd)
+static __always_inline int
+__do_follow_link(struct path *path, struct nameidata *nd, void **p)
 {
 	int error;
-	void *cookie;
 	struct dentry *dentry = path->dentry;
 
 	touch_atime(path->mnt, dentry);
@@ -562,9 +544,9 @@
 	}
 	mntget(path->mnt);
 	nd->last_type = LAST_BIND;
-	cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
-	error = PTR_ERR(cookie);
-	if (!IS_ERR(cookie)) {
+	*p = dentry->d_inode->i_op->follow_link(dentry, nd);
+	error = PTR_ERR(*p);
+	if (!IS_ERR(*p)) {
 		char *s = nd_get_link(nd);
 		error = 0;
 		if (s)
@@ -574,8 +556,6 @@
 			if (error)
 				path_put(&nd->path);
 		}
-		if (dentry->d_inode->i_op->put_link)
-			dentry->d_inode->i_op->put_link(dentry, nd, cookie);
 	}
 	return error;
 }
@@ -589,6 +569,7 @@
  */
 static inline int do_follow_link(struct path *path, struct nameidata *nd)
 {
+	void *cookie;
 	int err = -ELOOP;
 	if (current->link_count >= MAX_NESTED_LINKS)
 		goto loop;
@@ -602,7 +583,9 @@
 	current->link_count++;
 	current->total_link_count++;
 	nd->depth++;
-	err = __do_follow_link(path, nd);
+	err = __do_follow_link(path, nd, &cookie);
+	if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
+		path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
 	path_put(path);
 	current->link_count--;
 	nd->depth--;
@@ -689,33 +672,20 @@
 	set_root(nd);
 
 	while(1) {
-		struct vfsmount *parent;
 		struct dentry *old = nd->path.dentry;
 
 		if (nd->path.dentry == nd->root.dentry &&
 		    nd->path.mnt == nd->root.mnt) {
 			break;
 		}
-		spin_lock(&dcache_lock);
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
-			nd->path.dentry = dget(nd->path.dentry->d_parent);
-			spin_unlock(&dcache_lock);
+			/* rare case of legitimate dget_parent()... */
+			nd->path.dentry = dget_parent(nd->path.dentry);
 			dput(old);
 			break;
 		}
-		spin_unlock(&dcache_lock);
-		spin_lock(&vfsmount_lock);
-		parent = nd->path.mnt->mnt_parent;
-		if (parent == nd->path.mnt) {
-			spin_unlock(&vfsmount_lock);
+		if (!follow_up(&nd->path))
 			break;
-		}
-		mntget(parent);
-		nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
-		spin_unlock(&vfsmount_lock);
-		dput(old);
-		mntput(nd->path.mnt);
-		nd->path.mnt = parent;
 	}
 	follow_mount(&nd->path);
 }
@@ -1347,7 +1317,7 @@
 		return -ENOENT;
 
 	BUG_ON(victim->d_parent->d_inode != dir);
-	audit_inode_child(victim->d_name.name, victim, dir);
+	audit_inode_child(victim, dir);
 
 	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
 	if (error)
@@ -1388,22 +1358,6 @@
 	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 }
 
-/* 
- * O_DIRECTORY translates into forcing a directory lookup.
- */
-static inline int lookup_flags(unsigned int f)
-{
-	unsigned long retval = LOOKUP_FOLLOW;
-
-	if (f & O_NOFOLLOW)
-		retval &= ~LOOKUP_FOLLOW;
-	
-	if (f & O_DIRECTORY)
-		retval |= LOOKUP_DIRECTORY;
-
-	return retval;
-}
-
 /*
  * p1 and p2 should be directories on the same fs.
  */
@@ -1461,7 +1415,6 @@
 	error = security_inode_create(dir, dentry, mode);
 	if (error)
 		return error;
-	vfs_dq_init(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -1503,7 +1456,7 @@
 	 * An append-only file must be opened in append mode for writing.
 	 */
 	if (IS_APPEND(inode)) {
-		if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
+		if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
 			return -EPERM;
 		if (flag & O_TRUNC)
 			return -EPERM;
@@ -1547,7 +1500,7 @@
  * what get passed to sys_open().
  */
 static int __open_namei_create(struct nameidata *nd, struct path *path,
-				int flag, int mode)
+				int open_flag, int mode)
 {
 	int error;
 	struct dentry *dir = nd->path.dentry;
@@ -1565,7 +1518,7 @@
 	if (error)
 		return error;
 	/* Don't check for write permission, don't truncate */
-	return may_open(&nd->path, 0, flag & ~O_TRUNC);
+	return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
 }
 
 /*
@@ -1603,129 +1556,132 @@
 	return (flag & O_TRUNC);
 }
 
-/*
- * Note that the low bits of the passed in "open_flag"
- * are not the same as in the local variable "flag". See
- * open_to_namei_flags() for more details.
- */
-struct file *do_filp_open(int dfd, const char *pathname,
-		int open_flag, int mode, int acc_mode)
+static struct file *finish_open(struct nameidata *nd,
+				int open_flag, int acc_mode)
 {
 	struct file *filp;
-	struct nameidata nd;
-	int error;
-	struct path path;
-	struct dentry *dir;
-	int count = 0;
 	int will_truncate;
-	int flag = open_to_namei_flags(open_flag);
-	int force_reval = 0;
+	int error;
 
-	/*
-	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
-	 * check for O_DSYNC if the need any syncing at all we enforce it's
-	 * always set instead of having to deal with possibly weird behaviour
-	 * for malicious applications setting only __O_SYNC.
-	 */
-	if (open_flag & __O_SYNC)
-		open_flag |= O_DSYNC;
-
-	if (!acc_mode)
-		acc_mode = MAY_OPEN | ACC_MODE(open_flag);
-
-	/* O_TRUNC implies we need access checks for write permissions */
-	if (flag & O_TRUNC)
-		acc_mode |= MAY_WRITE;
-
-	/* Allow the LSM permission hook to distinguish append 
-	   access from general write access. */
-	if (flag & O_APPEND)
-		acc_mode |= MAY_APPEND;
-
-	/*
-	 * The simplest case - just a plain lookup.
-	 */
-	if (!(flag & O_CREAT)) {
-		filp = get_empty_filp();
-
-		if (filp == NULL)
-			return ERR_PTR(-ENFILE);
-		nd.intent.open.file = filp;
-		filp->f_flags = open_flag;
-		nd.intent.open.flags = flag;
-		nd.intent.open.create_mode = 0;
-		error = do_path_lookup(dfd, pathname,
-					lookup_flags(flag)|LOOKUP_OPEN, &nd);
-		if (IS_ERR(nd.intent.open.file)) {
-			if (error == 0) {
-				error = PTR_ERR(nd.intent.open.file);
-				path_put(&nd.path);
-			}
-		} else if (error)
-			release_open_intent(&nd);
+	will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
+	if (will_truncate) {
+		error = mnt_want_write(nd->path.mnt);
 		if (error)
-			return ERR_PTR(error);
+			goto exit;
+	}
+	error = may_open(&nd->path, acc_mode, open_flag);
+	if (error) {
+		if (will_truncate)
+			mnt_drop_write(nd->path.mnt);
+		goto exit;
+	}
+	filp = nameidata_to_filp(nd);
+	if (!IS_ERR(filp)) {
+		error = ima_file_check(filp, acc_mode);
+		if (error) {
+			fput(filp);
+			filp = ERR_PTR(error);
+		}
+	}
+	if (!IS_ERR(filp)) {
+		if (will_truncate) {
+			error = handle_truncate(&nd->path);
+			if (error) {
+				fput(filp);
+				filp = ERR_PTR(error);
+			}
+		}
+	}
+	/*
+	 * It is now safe to drop the mnt write
+	 * because the filp has had a write taken
+	 * on its behalf.
+	 */
+	if (will_truncate)
+		mnt_drop_write(nd->path.mnt);
+	return filp;
+
+exit:
+	if (!IS_ERR(nd->intent.open.file))
+		release_open_intent(nd);
+	path_put(&nd->path);
+	return ERR_PTR(error);
+}
+
+static struct file *do_last(struct nameidata *nd, struct path *path,
+			    int open_flag, int acc_mode,
+			    int mode, const char *pathname,
+			    int *want_dir)
+{
+	struct dentry *dir = nd->path.dentry;
+	struct file *filp;
+	int error = -EISDIR;
+
+	switch (nd->last_type) {
+	case LAST_DOTDOT:
+		follow_dotdot(nd);
+		dir = nd->path.dentry;
+		if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
+			if (!dir->d_op->d_revalidate(dir, nd)) {
+				error = -ESTALE;
+				goto exit;
+			}
+		}
+		/* fallthrough */
+	case LAST_DOT:
+	case LAST_ROOT:
+		if (open_flag & O_CREAT)
+			goto exit;
+		/* fallthrough */
+	case LAST_BIND:
+		audit_inode(pathname, dir);
 		goto ok;
 	}
 
-	/*
-	 * Create - we need to know the parent.
-	 */
-reval:
-	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
-	if (error)
-		return ERR_PTR(error);
-	if (force_reval)
-		nd.flags |= LOOKUP_REVAL;
-	error = path_walk(pathname, &nd);
-	if (error) {
-		if (nd.root.mnt)
-			path_put(&nd.root);
-		return ERR_PTR(error);
+	/* trailing slashes? */
+	if (nd->last.name[nd->last.len]) {
+		if (open_flag & O_CREAT)
+			goto exit;
+		*want_dir = 1;
 	}
-	if (unlikely(!audit_dummy_context()))
-		audit_inode(pathname, nd.path.dentry);
 
-	/*
-	 * We have the parent and last component. First of all, check
-	 * that we are not asked to creat(2) an obvious directory - that
-	 * will not do.
-	 */
-	error = -EISDIR;
-	if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
-		goto exit_parent;
+	/* just plain open? */
+	if (!(open_flag & O_CREAT)) {
+		error = do_lookup(nd, &nd->last, path);
+		if (error)
+			goto exit;
+		error = -ENOENT;
+		if (!path->dentry->d_inode)
+			goto exit_dput;
+		if (path->dentry->d_inode->i_op->follow_link)
+			return NULL;
+		error = -ENOTDIR;
+		if (*want_dir & !path->dentry->d_inode->i_op->lookup)
+			goto exit_dput;
+		path_to_nameidata(path, nd);
+		audit_inode(pathname, nd->path.dentry);
+		goto ok;
+	}
 
-	error = -ENFILE;
-	filp = get_empty_filp();
-	if (filp == NULL)
-		goto exit_parent;
-	nd.intent.open.file = filp;
-	filp->f_flags = open_flag;
-	nd.intent.open.flags = flag;
-	nd.intent.open.create_mode = mode;
-	dir = nd.path.dentry;
-	nd.flags &= ~LOOKUP_PARENT;
-	nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
-	if (flag & O_EXCL)
-		nd.flags |= LOOKUP_EXCL;
+	/* OK, it's O_CREAT */
 	mutex_lock(&dir->d_inode->i_mutex);
-	path.dentry = lookup_hash(&nd);
-	path.mnt = nd.path.mnt;
 
-do_last:
-	error = PTR_ERR(path.dentry);
-	if (IS_ERR(path.dentry)) {
+	path->dentry = lookup_hash(nd);
+	path->mnt = nd->path.mnt;
+
+	error = PTR_ERR(path->dentry);
+	if (IS_ERR(path->dentry)) {
 		mutex_unlock(&dir->d_inode->i_mutex);
 		goto exit;
 	}
 
-	if (IS_ERR(nd.intent.open.file)) {
-		error = PTR_ERR(nd.intent.open.file);
+	if (IS_ERR(nd->intent.open.file)) {
+		error = PTR_ERR(nd->intent.open.file);
 		goto exit_mutex_unlock;
 	}
 
 	/* Negative dentry, just create the file */
-	if (!path.dentry->d_inode) {
+	if (!path->dentry->d_inode) {
 		/*
 		 * This write is needed to ensure that a
 		 * ro->rw transition does not occur between
@@ -1733,18 +1689,16 @@
 		 * a permanent write count is taken through
 		 * the 'struct file' in nameidata_to_filp().
 		 */
-		error = mnt_want_write(nd.path.mnt);
+		error = mnt_want_write(nd->path.mnt);
 		if (error)
 			goto exit_mutex_unlock;
-		error = __open_namei_create(&nd, &path, flag, mode);
+		error = __open_namei_create(nd, path, open_flag, mode);
 		if (error) {
-			mnt_drop_write(nd.path.mnt);
+			mnt_drop_write(nd->path.mnt);
 			goto exit;
 		}
-		filp = nameidata_to_filp(&nd);
-		mnt_drop_write(nd.path.mnt);
-		if (nd.root.mnt)
-			path_put(&nd.root);
+		filp = nameidata_to_filp(nd);
+		mnt_drop_write(nd->path.mnt);
 		if (!IS_ERR(filp)) {
 			error = ima_file_check(filp, acc_mode);
 			if (error) {
@@ -1759,150 +1713,181 @@
 	 * It already exists.
 	 */
 	mutex_unlock(&dir->d_inode->i_mutex);
-	audit_inode(pathname, path.dentry);
+	audit_inode(pathname, path->dentry);
 
 	error = -EEXIST;
-	if (flag & O_EXCL)
+	if (open_flag & O_EXCL)
 		goto exit_dput;
 
-	if (__follow_mount(&path)) {
+	if (__follow_mount(path)) {
 		error = -ELOOP;
-		if (flag & O_NOFOLLOW)
+		if (open_flag & O_NOFOLLOW)
 			goto exit_dput;
 	}
 
 	error = -ENOENT;
-	if (!path.dentry->d_inode)
+	if (!path->dentry->d_inode)
 		goto exit_dput;
-	if (path.dentry->d_inode->i_op->follow_link)
-		goto do_link;
 
-	path_to_nameidata(&path, &nd);
+	if (path->dentry->d_inode->i_op->follow_link)
+		return NULL;
+
+	path_to_nameidata(path, nd);
 	error = -EISDIR;
-	if (S_ISDIR(path.dentry->d_inode->i_mode))
+	if (S_ISDIR(path->dentry->d_inode->i_mode))
 		goto exit;
 ok:
-	/*
-	 * Consider:
-	 * 1. may_open() truncates a file
-	 * 2. a rw->ro mount transition occurs
-	 * 3. nameidata_to_filp() fails due to
-	 *    the ro mount.
-	 * That would be inconsistent, and should
-	 * be avoided. Taking this mnt write here
-	 * ensures that (2) can not occur.
-	 */
-	will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
-	if (will_truncate) {
-		error = mnt_want_write(nd.path.mnt);
-		if (error)
-			goto exit;
-	}
-	error = may_open(&nd.path, acc_mode, flag);
-	if (error) {
-		if (will_truncate)
-			mnt_drop_write(nd.path.mnt);
-		goto exit;
-	}
-	filp = nameidata_to_filp(&nd);
-	if (!IS_ERR(filp)) {
-		error = ima_file_check(filp, acc_mode);
-		if (error) {
-			fput(filp);
-			filp = ERR_PTR(error);
-		}
-	}
-	if (!IS_ERR(filp)) {
-		if (acc_mode & MAY_WRITE)
-			vfs_dq_init(nd.path.dentry->d_inode);
-
-		if (will_truncate) {
-			error = handle_truncate(&nd.path);
-			if (error) {
-				fput(filp);
-				filp = ERR_PTR(error);
-			}
-		}
-	}
-	/*
-	 * It is now safe to drop the mnt write
-	 * because the filp has had a write taken
-	 * on its behalf.
-	 */
-	if (will_truncate)
-		mnt_drop_write(nd.path.mnt);
-	if (nd.root.mnt)
-		path_put(&nd.root);
+	filp = finish_open(nd, open_flag, acc_mode);
 	return filp;
 
 exit_mutex_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 exit_dput:
-	path_put_conditional(&path, &nd);
+	path_put_conditional(path, nd);
 exit:
+	if (!IS_ERR(nd->intent.open.file))
+		release_open_intent(nd);
+	path_put(&nd->path);
+	return ERR_PTR(error);
+}
+
+/*
+ * Note that the low bits of the passed in "open_flag"
+ * are not the same as in the local variable "flag". See
+ * open_to_namei_flags() for more details.
+ */
+struct file *do_filp_open(int dfd, const char *pathname,
+		int open_flag, int mode, int acc_mode)
+{
+	struct file *filp;
+	struct nameidata nd;
+	int error;
+	struct path path;
+	int count = 0;
+	int flag = open_to_namei_flags(open_flag);
+	int force_reval = 0;
+	int want_dir = open_flag & O_DIRECTORY;
+
+	if (!(open_flag & O_CREAT))
+		mode = 0;
+
+	/*
+	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
+	 * check for O_DSYNC if the need any syncing at all we enforce it's
+	 * always set instead of having to deal with possibly weird behaviour
+	 * for malicious applications setting only __O_SYNC.
+	 */
+	if (open_flag & __O_SYNC)
+		open_flag |= O_DSYNC;
+
+	if (!acc_mode)
+		acc_mode = MAY_OPEN | ACC_MODE(open_flag);
+
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (open_flag & O_TRUNC)
+		acc_mode |= MAY_WRITE;
+
+	/* Allow the LSM permission hook to distinguish append 
+	   access from general write access. */
+	if (open_flag & O_APPEND)
+		acc_mode |= MAY_APPEND;
+
+	/* find the parent */
+reval:
+	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
+	if (error)
+		return ERR_PTR(error);
+	if (force_reval)
+		nd.flags |= LOOKUP_REVAL;
+
+	current->total_link_count = 0;
+	error = link_path_walk(pathname, &nd);
+	if (error) {
+		filp = ERR_PTR(error);
+		goto out;
+	}
+	if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
+		audit_inode(pathname, nd.path.dentry);
+
+	/*
+	 * We have the parent and last component.
+	 */
+
+	error = -ENFILE;
+	filp = get_empty_filp();
+	if (filp == NULL)
+		goto exit_parent;
+	nd.intent.open.file = filp;
+	filp->f_flags = open_flag;
+	nd.intent.open.flags = flag;
+	nd.intent.open.create_mode = mode;
+	nd.flags &= ~LOOKUP_PARENT;
+	nd.flags |= LOOKUP_OPEN;
+	if (open_flag & O_CREAT) {
+		nd.flags |= LOOKUP_CREATE;
+		if (open_flag & O_EXCL)
+			nd.flags |= LOOKUP_EXCL;
+	}
+	filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
+	while (unlikely(!filp)) { /* trailing symlink */
+		struct path holder;
+		struct inode *inode = path.dentry->d_inode;
+		void *cookie;
+		error = -ELOOP;
+		/* S_ISDIR part is a temporary automount kludge */
+		if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
+			goto exit_dput;
+		if (count++ == 32)
+			goto exit_dput;
+		/*
+		 * This is subtle. Instead of calling do_follow_link() we do
+		 * the thing by hands. The reason is that this way we have zero
+		 * link_count and path_walk() (called from ->follow_link)
+		 * honoring LOOKUP_PARENT.  After that we have the parent and
+		 * last component, i.e. we are in the same situation as after
+		 * the first path_walk().  Well, almost - if the last component
+		 * is normal we get its copy stored in nd->last.name and we will
+		 * have to putname() it when we are done. Procfs-like symlinks
+		 * just set LAST_BIND.
+		 */
+		nd.flags |= LOOKUP_PARENT;
+		error = security_inode_follow_link(path.dentry, &nd);
+		if (error)
+			goto exit_dput;
+		error = __do_follow_link(&path, &nd, &cookie);
+		if (unlikely(error)) {
+			/* nd.path had been dropped */
+			if (!IS_ERR(cookie) && inode->i_op->put_link)
+				inode->i_op->put_link(path.dentry, &nd, cookie);
+			path_put(&path);
+			release_open_intent(&nd);
+			filp = ERR_PTR(error);
+			goto out;
+		}
+		holder = path;
+		nd.flags &= ~LOOKUP_PARENT;
+		filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
+		if (inode->i_op->put_link)
+			inode->i_op->put_link(holder.dentry, &nd, cookie);
+		path_put(&holder);
+	}
+out:
+	if (nd.root.mnt)
+		path_put(&nd.root);
+	if (filp == ERR_PTR(-ESTALE) && !force_reval) {
+		force_reval = 1;
+		goto reval;
+	}
+	return filp;
+
+exit_dput:
+	path_put_conditional(&path, &nd);
 	if (!IS_ERR(nd.intent.open.file))
 		release_open_intent(&nd);
 exit_parent:
-	if (nd.root.mnt)
-		path_put(&nd.root);
 	path_put(&nd.path);
-	return ERR_PTR(error);
-
-do_link:
-	error = -ELOOP;
-	if (flag & O_NOFOLLOW)
-		goto exit_dput;
-	/*
-	 * This is subtle. Instead of calling do_follow_link() we do the
-	 * thing by hands. The reason is that this way we have zero link_count
-	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
-	 * After that we have the parent and last component, i.e.
-	 * we are in the same situation as after the first path_walk().
-	 * Well, almost - if the last component is normal we get its copy
-	 * stored in nd->last.name and we will have to putname() it when we
-	 * are done. Procfs-like symlinks just set LAST_BIND.
-	 */
-	nd.flags |= LOOKUP_PARENT;
-	error = security_inode_follow_link(path.dentry, &nd);
-	if (error)
-		goto exit_dput;
-	error = __do_follow_link(&path, &nd);
-	path_put(&path);
-	if (error) {
-		/* Does someone understand code flow here? Or it is only
-		 * me so stupid? Anathema to whoever designed this non-sense
-		 * with "intent.open".
-		 */
-		release_open_intent(&nd);
-		if (nd.root.mnt)
-			path_put(&nd.root);
-		if (error == -ESTALE && !force_reval) {
-			force_reval = 1;
-			goto reval;
-		}
-		return ERR_PTR(error);
-	}
-	nd.flags &= ~LOOKUP_PARENT;
-	if (nd.last_type == LAST_BIND)
-		goto ok;
-	error = -EISDIR;
-	if (nd.last_type != LAST_NORM)
-		goto exit;
-	if (nd.last.name[nd.last.len]) {
-		__putname(nd.last.name);
-		goto exit;
-	}
-	error = -ELOOP;
-	if (count++==32) {
-		__putname(nd.last.name);
-		goto exit;
-	}
-	dir = nd.path.dentry;
-	mutex_lock(&dir->d_inode->i_mutex);
-	path.dentry = lookup_hash(&nd);
-	path.mnt = nd.path.mnt;
-	__putname(nd.last.name);
-	goto do_last;
+	filp = ERR_PTR(error);
+	goto out;
 }
 
 /**
@@ -1996,7 +1981,6 @@
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -2095,7 +2079,6 @@
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error)
 		fsnotify_mkdir(dir, dentry);
@@ -2181,8 +2164,6 @@
 	if (!dir->i_op->rmdir)
 		return -EPERM;
 
-	vfs_dq_init(dir);
-
 	mutex_lock(&dentry->d_inode->i_mutex);
 	dentry_unhash(dentry);
 	if (d_mountpoint(dentry))
@@ -2268,15 +2249,16 @@
 	if (!dir->i_op->unlink)
 		return -EPERM;
 
-	vfs_dq_init(dir);
-
 	mutex_lock(&dentry->d_inode->i_mutex);
 	if (d_mountpoint(dentry))
 		error = -EBUSY;
 	else {
 		error = security_inode_unlink(dir, dentry);
-		if (!error)
+		if (!error) {
 			error = dir->i_op->unlink(dir, dentry);
+			if (!error)
+				dentry->d_inode->i_flags |= S_DEAD;
+		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
@@ -2379,7 +2361,6 @@
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -2463,7 +2444,6 @@
 		return error;
 
 	mutex_lock(&inode->i_mutex);
-	vfs_dq_init(dir);
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
@@ -2629,6 +2609,8 @@
 	else
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (!error) {
+		if (target)
+			target->i_flags |= S_DEAD;
 		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 			d_move(old_dentry, new_dentry);
 	}
@@ -2662,20 +2644,15 @@
 	if (!old_dir->i_op->rename)
 		return -EPERM;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
-
 	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 
 	if (is_dir)
 		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
 	else
 		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-	if (!error) {
-		const char *new_name = old_dentry->d_name.name;
-		fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
+	if (!error)
+		fsnotify_move(old_dir, new_dir, old_name, is_dir,
 			      new_dentry->d_inode, old_dentry);
-	}
 	fsnotify_oldname_free(old_name);
 
 	return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index c768f73..8174c8a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -573,7 +573,7 @@
 			mnt->mnt_master = old;
 			CLEAR_MNT_SHARED(mnt);
 		} else if (!(flag & CL_PRIVATE)) {
-			if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
+			if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
 				list_add(&mnt->mnt_share, &old->mnt_share);
 			if (IS_MNT_SLAVE(old))
 				list_add(&mnt->mnt_slave, &old->mnt_slave);
@@ -737,6 +737,21 @@
 	up_read(&namespace_sem);
 }
 
+int mnt_had_events(struct proc_mounts *p)
+{
+	struct mnt_namespace *ns = p->ns;
+	int res = 0;
+
+	spin_lock(&vfsmount_lock);
+	if (p->event != ns->event) {
+		p->event = ns->event;
+		res = 1;
+	}
+	spin_unlock(&vfsmount_lock);
+
+	return res;
+}
+
 struct proc_fs_info {
 	int flag;
 	const char *str;
@@ -1121,8 +1136,15 @@
 {
 	struct path path;
 	int retval;
+	int lookup_flags = 0;
 
-	retval = user_path(name, &path);
+	if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
+		return -EINVAL;
+
+	if (!(flags & UMOUNT_NOFOLLOW))
+		lookup_flags |= LOOKUP_FOLLOW;
+
+	retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
 	if (retval)
 		goto out;
 	retval = -EINVAL;
@@ -1246,6 +1268,21 @@
 	release_mounts(&umount_list);
 }
 
+int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
+		   struct vfsmount *root)
+{
+	struct vfsmount *mnt;
+	int res = f(root, arg);
+	if (res)
+		return res;
+	list_for_each_entry(mnt, &root->mnt_list, mnt_list) {
+		res = f(mnt, arg);
+		if (res)
+			return res;
+	}
+	return 0;
+}
+
 static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
 {
 	struct vfsmount *p;
@@ -1538,7 +1575,7 @@
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
 		spin_lock(&vfsmount_lock);
-		mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
+		mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
 		path->mnt->mnt_flags = mnt_flags;
 		spin_unlock(&vfsmount_lock);
 	}
@@ -1671,7 +1708,7 @@
 {
 	int err;
 
-	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
+	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
 
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
@@ -2314,17 +2351,13 @@
 
 void put_mnt_ns(struct mnt_namespace *ns)
 {
-	struct vfsmount *root;
 	LIST_HEAD(umount_list);
 
-	if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+	if (!atomic_dec_and_test(&ns->count))
 		return;
-	root = ns->root;
-	ns->root = NULL;
-	spin_unlock(&vfsmount_lock);
 	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
-	umount_tree(root, 0, &umount_list);
+	umount_tree(ns->root, 0, &umount_list);
 	spin_unlock(&vfsmount_lock);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d4036be..85a7cfd 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -119,6 +119,14 @@
 };
 
 extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+
+struct cb_recallslotargs {
+	struct sockaddr	*crsa_addr;
+	uint32_t	crsa_target_max_slots;
+};
+extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+					  void *dummy);
+
 #endif /* CONFIG_NFS_V4_1 */
 
 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index defa9b4..84761b5 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -143,44 +143,49 @@
  * Return success if the sequenceID is one more than what we last saw on
  * this slot, accounting for wraparound.  Increments the slot's sequence.
  *
- * We don't yet implement a duplicate request cache, so at this time
- * we will log replays, and process them as if we had not seen them before,
- * but we don't bump the sequence in the slot.  Not too worried about it,
+ * We don't yet implement a duplicate request cache, instead we set the
+ * back channel ca_maxresponsesize_cached to zero. This is OK for now
  * since we only currently implement idempotent callbacks anyway.
  *
  * We have a single slot backchannel at this time, so we don't bother
  * checking the used_slots bit array on the table.  The lower layer guarantees
  * a single outstanding callback request at a time.
  */
-static int
-validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid)
+static __be32
+validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
 {
 	struct nfs4_slot *slot;
 
 	dprintk("%s enter. slotid %d seqid %d\n",
-		__func__, slotid, seqid);
+		__func__, args->csa_slotid, args->csa_sequenceid);
 
-	if (slotid > NFS41_BC_MAX_CALLBACKS)
+	if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS)
 		return htonl(NFS4ERR_BADSLOT);
 
-	slot = tbl->slots + slotid;
+	slot = tbl->slots + args->csa_slotid;
 	dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr);
 
 	/* Normal */
-	if (likely(seqid == slot->seq_nr + 1)) {
+	if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
 		slot->seq_nr++;
 		return htonl(NFS4_OK);
 	}
 
 	/* Replay */
-	if (seqid == slot->seq_nr) {
-		dprintk("%s seqid %d is a replay - no DRC available\n",
-			__func__, seqid);
-		return htonl(NFS4_OK);
+	if (args->csa_sequenceid == slot->seq_nr) {
+		dprintk("%s seqid %d is a replay\n",
+			__func__, args->csa_sequenceid);
+		/* Signal process_op to set this error on next op */
+		if (args->csa_cachethis == 0)
+			return htonl(NFS4ERR_RETRY_UNCACHED_REP);
+
+		/* The ca_maxresponsesize_cached is 0 with no DRC */
+		else if (args->csa_cachethis == 1)
+			return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
 	}
 
 	/* Wraparound */
-	if (seqid == 1 && (slot->seq_nr + 1) == 0) {
+	if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
 		slot->seq_nr = 1;
 		return htonl(NFS4_OK);
 	}
@@ -225,27 +230,87 @@
 	return NULL;
 }
 
-/* FIXME: referring calls should be processed */
-unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
+/*
+ * For each referring call triple, check the session's slot table for
+ * a match.  If the slot is in use and the sequence numbers match, the
+ * client is still waiting for a response to the original request.
+ */
+static bool referring_call_exists(struct nfs_client *clp,
+				  uint32_t nrclists,
+				  struct referring_call_list *rclists)
+{
+	bool status = 0;
+	int i, j;
+	struct nfs4_session *session;
+	struct nfs4_slot_table *tbl;
+	struct referring_call_list *rclist;
+	struct referring_call *ref;
+
+	/*
+	 * XXX When client trunking is implemented, this becomes
+	 * a session lookup from within the loop
+	 */
+	session = clp->cl_session;
+	tbl = &session->fc_slot_table;
+
+	for (i = 0; i < nrclists; i++) {
+		rclist = &rclists[i];
+		if (memcmp(session->sess_id.data,
+			   rclist->rcl_sessionid.data,
+			   NFS4_MAX_SESSIONID_LEN) != 0)
+			continue;
+
+		for (j = 0; j < rclist->rcl_nrefcalls; j++) {
+			ref = &rclist->rcl_refcalls[j];
+
+			dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u "
+				"slotid %u\n", __func__,
+				((u32 *)&rclist->rcl_sessionid.data)[0],
+				((u32 *)&rclist->rcl_sessionid.data)[1],
+				((u32 *)&rclist->rcl_sessionid.data)[2],
+				((u32 *)&rclist->rcl_sessionid.data)[3],
+				ref->rc_sequenceid, ref->rc_slotid);
+
+			spin_lock(&tbl->slot_tbl_lock);
+			status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
+				  tbl->slots[ref->rc_slotid].seq_nr ==
+					ref->rc_sequenceid);
+			spin_unlock(&tbl->slot_tbl_lock);
+			if (status)
+				goto out;
+		}
+	}
+
+out:
+	return status;
+}
+
+__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
 				struct cb_sequenceres *res)
 {
 	struct nfs_client *clp;
-	int i, status;
-
-	for (i = 0; i < args->csa_nrclists; i++)
-		kfree(args->csa_rclists[i].rcl_refcalls);
-	kfree(args->csa_rclists);
+	int i;
+	__be32 status;
 
 	status = htonl(NFS4ERR_BADSESSION);
 	clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
 	if (clp == NULL)
 		goto out;
 
-	status = validate_seqid(&clp->cl_session->bc_slot_table,
-				args->csa_slotid, args->csa_sequenceid);
+	status = validate_seqid(&clp->cl_session->bc_slot_table, args);
 	if (status)
 		goto out_putclient;
 
+	/*
+	 * Check for pending referring calls.  If a match is found, a
+	 * related callback was received before the response to the original
+	 * call.
+	 */
+	if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) {
+		status = htonl(NFS4ERR_DELAY);
+		goto out_putclient;
+	}
+
 	memcpy(&res->csr_sessionid, &args->csa_sessionid,
 	       sizeof(res->csr_sessionid));
 	res->csr_sequenceid = args->csa_sequenceid;
@@ -256,15 +321,23 @@
 out_putclient:
 	nfs_put_client(clp);
 out:
-	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
-	res->csr_status = status;
-	return res->csr_status;
+	for (i = 0; i < args->csa_nrclists; i++)
+		kfree(args->csa_rclists[i].rcl_refcalls);
+	kfree(args->csa_rclists);
+
+	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
+		res->csr_status = 0;
+	else
+		res->csr_status = status;
+	dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
+		ntohl(status), ntohl(res->csr_status));
+	return status;
 }
 
-unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
 {
 	struct nfs_client *clp;
-	int status;
+	__be32 status;
 	fmode_t flags = 0;
 
 	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
@@ -289,4 +362,40 @@
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
 }
+
+/* Reduce the fore channel's max_slots to the target value */
+__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+{
+	struct nfs_client *clp;
+	struct nfs4_slot_table *fc_tbl;
+	__be32 status;
+
+	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	clp = nfs_find_client(args->crsa_addr, 4);
+	if (clp == NULL)
+		goto out;
+
+	dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
+		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+		args->crsa_target_max_slots);
+
+	fc_tbl = &clp->cl_session->fc_slot_table;
+
+	status = htonl(NFS4ERR_BAD_HIGH_SLOT);
+	if (args->crsa_target_max_slots > fc_tbl->max_slots ||
+	    args->crsa_target_max_slots < 1)
+		goto out_putclient;
+
+	status = htonl(NFS4_OK);
+	if (args->crsa_target_max_slots == fc_tbl->max_slots)
+		goto out_putclient;
+
+	fc_tbl->target_max_slots = args->crsa_target_max_slots;
+	nfs41_handle_recall_slot(clp);
+out_putclient:
+	nfs_put_client(clp);	/* balance nfs_find_client */
+out:
+	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 8e1a251..db30c0b 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -24,10 +24,14 @@
 #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
 					4 + 1 + 3)
 #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+#define CB_OP_RECALLSLOT_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 #endif /* CONFIG_NFS_V4_1 */
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
+/* Internal error code */
+#define NFS4ERR_RESOURCE_HDR	11050
+
 typedef __be32 (*callback_process_op_t)(void *, void *);
 typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
 typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
@@ -173,7 +177,7 @@
 	__be32 *p;
 	p = read_buf(xdr, 4);
 	if (unlikely(p == NULL))
-		return htonl(NFS4ERR_RESOURCE);
+		return htonl(NFS4ERR_RESOURCE_HDR);
 	*op = ntohl(*p);
 	return 0;
 }
@@ -215,10 +219,10 @@
 
 #if defined(CONFIG_NFS_V4_1)
 
-static unsigned decode_sessionid(struct xdr_stream *xdr,
+static __be32 decode_sessionid(struct xdr_stream *xdr,
 				 struct nfs4_sessionid *sid)
 {
-	uint32_t *p;
+	__be32 *p;
 	int len = NFS4_MAX_SESSIONID_LEN;
 
 	p = read_buf(xdr, len);
@@ -229,12 +233,12 @@
 	return 0;
 }
 
-static unsigned decode_rc_list(struct xdr_stream *xdr,
+static __be32 decode_rc_list(struct xdr_stream *xdr,
 			       struct referring_call_list *rc_list)
 {
-	uint32_t *p;
+	__be32 *p;
 	int i;
-	unsigned status;
+	__be32 status;
 
 	status = decode_sessionid(xdr, &rc_list->rcl_sessionid);
 	if (status)
@@ -267,13 +271,13 @@
 	return status;
 }
 
-static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp,
+static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
 					struct xdr_stream *xdr,
 					struct cb_sequenceargs *args)
 {
-	uint32_t *p;
+	__be32 *p;
 	int i;
-	unsigned status;
+	__be32 status;
 
 	status = decode_sessionid(xdr, &args->csa_sessionid);
 	if (status)
@@ -327,11 +331,11 @@
 	goto out;
 }
 
-static unsigned decode_recallany_args(struct svc_rqst *rqstp,
+static __be32 decode_recallany_args(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr,
 				      struct cb_recallanyargs *args)
 {
-	uint32_t *p;
+	__be32 *p;
 
 	args->craa_addr = svc_addr(rqstp);
 	p = read_buf(xdr, 4);
@@ -346,6 +350,20 @@
 	return 0;
 }
 
+static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					struct cb_recallslotargs *args)
+{
+	__be32 *p;
+
+	args->crsa_addr = svc_addr(rqstp);
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_BADXDR);
+	args->crsa_target_max_slots = ntohl(*p++);
+	return 0;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
@@ -465,7 +483,7 @@
 	
 	p = xdr_reserve_space(xdr, 8);
 	if (unlikely(p == NULL))
-		return htonl(NFS4ERR_RESOURCE);
+		return htonl(NFS4ERR_RESOURCE_HDR);
 	*p++ = htonl(op);
 	*p = res;
 	return 0;
@@ -499,10 +517,10 @@
 
 #if defined(CONFIG_NFS_V4_1)
 
-static unsigned encode_sessionid(struct xdr_stream *xdr,
+static __be32 encode_sessionid(struct xdr_stream *xdr,
 				 const struct nfs4_sessionid *sid)
 {
-	uint32_t *p;
+	__be32 *p;
 	int len = NFS4_MAX_SESSIONID_LEN;
 
 	p = xdr_reserve_space(xdr, len);
@@ -513,11 +531,11 @@
 	return 0;
 }
 
-static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp,
+static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
 				       struct xdr_stream *xdr,
 				       const struct cb_sequenceres *res)
 {
-	uint32_t *p;
+	__be32 *p;
 	unsigned status = res->csr_status;
 
 	if (unlikely(status != 0))
@@ -554,6 +572,7 @@
 	case OP_CB_RECALL:
 	case OP_CB_SEQUENCE:
 	case OP_CB_RECALL_ANY:
+	case OP_CB_RECALL_SLOT:
 		*op = &callback_ops[op_nr];
 		break;
 
@@ -562,7 +581,6 @@
 	case OP_CB_NOTIFY:
 	case OP_CB_PUSH_DELEG:
 	case OP_CB_RECALLABLE_OBJ_AVAIL:
-	case OP_CB_RECALL_SLOT:
 	case OP_CB_WANTS_CANCELLED:
 	case OP_CB_NOTIFY_LOCK:
 		return htonl(NFS4ERR_NOTSUPP);
@@ -602,20 +620,18 @@
 static __be32 process_op(uint32_t minorversion, int nop,
 		struct svc_rqst *rqstp,
 		struct xdr_stream *xdr_in, void *argp,
-		struct xdr_stream *xdr_out, void *resp)
+		struct xdr_stream *xdr_out, void *resp, int* drc_status)
 {
 	struct callback_op *op = &callback_ops[0];
-	unsigned int op_nr = OP_CB_ILLEGAL;
+	unsigned int op_nr;
 	__be32 status;
 	long maxlen;
 	__be32 res;
 
 	dprintk("%s: start\n", __func__);
 	status = decode_op_hdr(xdr_in, &op_nr);
-	if (unlikely(status)) {
-		status = htonl(NFS4ERR_OP_ILLEGAL);
-		goto out;
-	}
+	if (unlikely(status))
+		return status;
 
 	dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
 		__func__, minorversion, nop, op_nr);
@@ -624,19 +640,32 @@
 				preprocess_nfs4_op(op_nr, &op);
 	if (status == htonl(NFS4ERR_OP_ILLEGAL))
 		op_nr = OP_CB_ILLEGAL;
-out:
+	if (status)
+		goto encode_hdr;
+
+	if (*drc_status) {
+		status = *drc_status;
+		goto encode_hdr;
+	}
+
 	maxlen = xdr_out->end - xdr_out->p;
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
-		if (likely(status == 0 && op->decode_args != NULL))
-			status = op->decode_args(rqstp, xdr_in, argp);
-		if (likely(status == 0 && op->process_op != NULL))
+		status = op->decode_args(rqstp, xdr_in, argp);
+		if (likely(status == 0))
 			status = op->process_op(argp, resp);
 	} else
 		status = htonl(NFS4ERR_RESOURCE);
 
+	/* Only set by OP_CB_SEQUENCE processing */
+	if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
+		*drc_status = status;
+		status = 0;
+	}
+
+encode_hdr:
 	res = encode_op_hdr(xdr_out, op_nr, status);
-	if (status == 0)
-		status = res;
+	if (unlikely(res))
+		return res;
 	if (op->encode_res != NULL && status == 0)
 		status = op->encode_res(rqstp, xdr_out, resp);
 	dprintk("%s: done, status = %d\n", __func__, ntohl(status));
@@ -652,7 +681,7 @@
 	struct cb_compound_hdr_res hdr_res = { NULL };
 	struct xdr_stream xdr_in, xdr_out;
 	__be32 *p;
-	__be32 status;
+	__be32 status, drc_status = 0;
 	unsigned int nops = 0;
 
 	dprintk("%s: start\n", __func__);
@@ -672,11 +701,18 @@
 		return rpc_system_err;
 
 	while (status == 0 && nops != hdr_arg.nops) {
-		status = process_op(hdr_arg.minorversion, nops,
-				    rqstp, &xdr_in, argp, &xdr_out, resp);
+		status = process_op(hdr_arg.minorversion, nops, rqstp,
+				    &xdr_in, argp, &xdr_out, resp, &drc_status);
 		nops++;
 	}
 
+	/* Buffer overflow in decode_ops_hdr or encode_ops_hdr. Return
+	* resource error in cb_compound status without returning op */
+	if (unlikely(status == htonl(NFS4ERR_RESOURCE_HDR))) {
+		status = htonl(NFS4ERR_RESOURCE);
+		nops--;
+	}
+
 	*hdr_res.status = status;
 	*hdr_res.nops = htonl(nops);
 	dprintk("%s: done, status = %u\n", __func__, ntohl(status));
@@ -713,6 +749,11 @@
 		.decode_args = (callback_decode_arg_t)decode_recallany_args,
 		.res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
 	},
+	[OP_CB_RECALL_SLOT] = {
+		.process_op = (callback_process_op_t)nfs4_callback_recallslot,
+		.decode_args = (callback_decode_arg_t)decode_recallslot_args,
+		.res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ,
+	},
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee77713..2274f17 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -164,30 +164,7 @@
 	return ERR_PTR(err);
 }
 
-static void nfs4_shutdown_client(struct nfs_client *clp)
-{
 #ifdef CONFIG_NFS_V4
-	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
-		nfs4_kill_renewd(clp);
-	BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
-	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
-		nfs_idmap_delete(clp);
-
-	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
-#endif
-}
-
-/*
- * Destroy the NFS4 callback service
- */
-static void nfs4_destroy_callback(struct nfs_client *clp)
-{
-#ifdef CONFIG_NFS_V4
-	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
-		nfs_callback_down(clp->cl_minorversion);
-#endif /* CONFIG_NFS_V4 */
-}
-
 /*
  * Clears/puts all minor version specific parts from an nfs_client struct
  * reverting it to minorversion 0.
@@ -202,18 +179,41 @@
 
 	clp->cl_call_sync = _nfs4_call_sync;
 #endif /* CONFIG_NFS_V4_1 */
-
-	nfs4_destroy_callback(clp);
 }
 
 /*
+ * Destroy the NFS4 callback service
+ */
+static void nfs4_destroy_callback(struct nfs_client *clp)
+{
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down(clp->cl_minorversion);
+}
+
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+		nfs4_kill_renewd(clp);
+	nfs4_clear_client_minor_version(clp);
+	nfs4_destroy_callback(clp);
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+		nfs_idmap_delete(clp);
+
+	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
+}
+#else
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+}
+#endif /* CONFIG_NFS_V4 */
+
+/*
  * Destroy a shared client record
  */
 static void nfs_free_client(struct nfs_client *clp)
 {
 	dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
 
-	nfs4_clear_client_minor_version(clp);
 	nfs4_shutdown_client(clp);
 
 	nfs_fscache_release_client_cookie(clp);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b..a1f6b44 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@
 	desc->entry = &my_entry;
 
 	nfs_block_sillyrename(dentry);
-	res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (res < 0)
 		goto out;
 
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 95e1ca7..3f0cd4d 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -36,6 +36,19 @@
 };
 
 
+static void nfs_dns_ent_update(struct cache_head *cnew,
+		struct cache_head *ckey)
+{
+	struct nfs_dns_ent *new;
+	struct nfs_dns_ent *key;
+
+	new = container_of(cnew, struct nfs_dns_ent, h);
+	key = container_of(ckey, struct nfs_dns_ent, h);
+
+	memcpy(&new->addr, &key->addr, key->addrlen);
+	new->addrlen = key->addrlen;
+}
+
 static void nfs_dns_ent_init(struct cache_head *cnew,
 		struct cache_head *ckey)
 {
@@ -49,8 +62,7 @@
 	new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
 	if (new->hostname) {
 		new->namelen = key->namelen;
-		memcpy(&new->addr, &key->addr, key->addrlen);
-		new->addrlen = key->addrlen;
+		nfs_dns_ent_update(cnew, ckey);
 	} else {
 		new->namelen = 0;
 		new->addrlen = 0;
@@ -234,7 +246,7 @@
 	.cache_show = nfs_dns_show,
 	.match = nfs_dns_match,
 	.init = nfs_dns_ent_init,
-	.update = nfs_dns_ent_init,
+	.update = nfs_dns_ent_update,
 	.alloc = nfs_dns_ent_alloc,
 };
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 63f2071..ae8d022 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -123,11 +123,11 @@
 			filp->f_path.dentry->d_parent->d_name.name,
 			filp->f_path.dentry->d_name.name);
 
+	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	res = nfs_check_flags(filp->f_flags);
 	if (res)
 		return res;
 
-	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	res = nfs_open(inode, filp);
 	return res;
 }
@@ -237,9 +237,9 @@
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name);
 
+	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 	if ((file->f_mode & FMODE_WRITE) == 0)
 		return 0;
-	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 
 	/* Flush writes to the server and return any errors */
 	return nfs_do_fsync(ctx, inode);
@@ -262,9 +262,11 @@
 		(unsigned long) count, (unsigned long) pos);
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
-	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
-	if (!result)
+	if (!result) {
 		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		if (result > 0)
+			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
+	}
 	return result;
 }
 
@@ -282,8 +284,11 @@
 		(unsigned long) count, (unsigned long long) *ppos);
 
 	res = nfs_revalidate_mapping(inode, filp->f_mapping);
-	if (!res)
+	if (!res) {
 		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
+		if (res > 0)
+			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
+	}
 	return res;
 }
 
@@ -596,6 +601,7 @@
 {
 	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
 	struct inode * inode = dentry->d_inode;
+	unsigned long written = 0;
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 
@@ -622,14 +628,18 @@
 	if (!count)
 		goto out;
 
-	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+	if (result > 0)
+		written = result;
+
 	/* Return error values for O_DSYNC and IS_SYNC() */
 	if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
 		int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
 		if (err < 0)
 			result = err;
 	}
+	if (result > 0)
+		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
 out:
 	return result;
 
@@ -644,6 +654,7 @@
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
+	unsigned long written = 0;
 	ssize_t ret;
 
 	dprintk("NFS splice_write(%s/%s, %lu@%llu)\n",
@@ -654,14 +665,17 @@
 	 * The combination of splice and an O_APPEND destination is disallowed.
 	 */
 
-	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
-
 	ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
+	if (ret > 0)
+		written = ret;
+
 	if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
 		int err = nfs_do_fsync(nfs_file_open_context(filp), inode);
 		if (err < 0)
 			ret = err;
 	}
+	if (ret > 0)
+		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
 	return ret;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f141bde..657201a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,22 +97,6 @@
 	return ino;
 }
 
-int nfs_write_inode(struct inode *inode, int sync)
-{
-	int ret;
-
-	if (sync) {
-		ret = filemap_fdatawait(inode->i_mapping);
-		if (ret == 0)
-			ret = nfs_commit_inode(inode, FLUSH_SYNC);
-	} else
-		ret = nfs_commit_inode(inode, 0);
-	if (ret >= 0)
-		return 0;
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	return ret;
-}
-
 void nfs_clear_inode(struct inode *inode)
 {
 	/*
@@ -130,16 +114,12 @@
  */
 int nfs_sync_mapping(struct address_space *mapping)
 {
-	int ret;
+	int ret = 0;
 
-	if (mapping->nrpages == 0)
-		return 0;
-	unmap_mapping_range(mapping, 0, 0, 0);
-	ret = filemap_write_and_wait(mapping);
-	if (ret != 0)
-		goto out;
-	ret = nfs_wb_all(mapping->host);
-out:
+	if (mapping->nrpages != 0) {
+		unmap_mapping_range(mapping, 0, 0, 0);
+		ret = nfs_wb_all(mapping->host);
+	}
 	return ret;
 }
 
@@ -511,17 +491,11 @@
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
-	/*
-	 * Flush out writes to the server in order to update c/mtime.
-	 *
-	 * Hold the i_mutex to suspend application writes temporarily;
-	 * this prevents long-running writing applications from blocking
-	 * nfs_wb_nocommit.
-	 */
+	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {
-		mutex_lock(&inode->i_mutex);
-		nfs_wb_nocommit(inode);
-		mutex_unlock(&inode->i_mutex);
+		err = filemap_write_and_wait(inode->i_mapping);
+		if (err)
+			goto out;
 	}
 
 	/*
@@ -545,6 +519,7 @@
 		generic_fillattr(inode, stat);
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	}
+out:
 	return err;
 }
 
@@ -574,14 +549,14 @@
 	nfs_revalidate_inode(server, inode);
 }
 
-static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred)
 {
 	struct nfs_open_context *ctx;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx != NULL) {
-		ctx->path.dentry = dget(dentry);
-		ctx->path.mnt = mntget(mnt);
+		ctx->path = *path;
+		path_get(&ctx->path);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
@@ -620,11 +595,6 @@
 	__put_nfs_open_context(ctx, 0);
 }
 
-static void put_nfs_open_context_sync(struct nfs_open_context *ctx)
-{
-	__put_nfs_open_context(ctx, 1);
-}
-
 /*
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
@@ -671,7 +641,7 @@
 		spin_lock(&inode->i_lock);
 		list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
 		spin_unlock(&inode->i_lock);
-		put_nfs_open_context_sync(ctx);
+		__put_nfs_open_context(ctx, filp->f_flags & O_DIRECT ? 0 : 1);
 	}
 }
 
@@ -686,7 +656,7 @@
 	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred);
+	ctx = alloc_nfs_open_context(&filp->f_path, cred);
 	put_rpccred(cred);
 	if (ctx == NULL)
 		return -ENOMEM;
@@ -779,7 +749,7 @@
 	return __nfs_revalidate_inode(server, inode);
 }
 
-static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	
@@ -800,49 +770,10 @@
 	return 0;
 }
 
-static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
-	int ret = 0;
-
-	mutex_lock(&inode->i_mutex);
-	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
-		ret = nfs_sync_mapping(mapping);
-		if (ret == 0)
-			ret = nfs_invalidate_mapping_nolock(inode, mapping);
-	}
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-/**
- * nfs_revalidate_mapping_nolock - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int ret = 0;
-
-	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
-		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
-		if (ret < 0)
-			goto out;
-	}
-	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
-		ret = nfs_invalidate_mapping_nolock(inode, mapping);
-out:
-	return ret;
-}
-
 /**
  * nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
- *
- * This version of the function will take the inode->i_mutex and attempt to
- * flush out all dirty data if it needs to invalidate the page cache.
  */
 int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
@@ -1420,6 +1351,7 @@
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	nfsi->npages = 0;
+	nfsi->ncommit = 0;
 	atomic_set(&nfsi->silly_count, 1);
 	INIT_HLIST_HEAD(&nfsi->silly_list);
 	init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 29e464d..11f82f0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -211,7 +211,7 @@
 extern struct workqueue_struct *nfsiod_workqueue;
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
-extern int nfs_write_inode(struct inode *,int);
+extern int nfs_write_inode(struct inode *, struct writeback_control *);
 extern void nfs_clear_inode(struct inode *);
 #ifdef CONFIG_NFS_V4
 extern void nfs4_clear_inode(struct inode *);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3f8881d..24992f0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -22,14 +22,14 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
-/* A wrapper to handle the EJUKEBOX error message */
+/* A wrapper to handle the EJUKEBOX and EKEYEXPIRED error messages */
 static int
 nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 {
 	int res;
 	do {
 		res = rpc_call_sync(clnt, msg, flags);
-		if (res != -EJUKEBOX)
+		if (res != -EJUKEBOX && res != -EKEYEXPIRED)
 			break;
 		schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
@@ -42,9 +42,10 @@
 static int
 nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
-	if (task->tk_status != -EJUKEBOX)
+	if (task->tk_status != -EJUKEBOX && task->tk_status != -EKEYEXPIRED)
 		return 0;
-	nfs_inc_stats(inode, NFSIOS_DELAY);
+	if (task->tk_status == -EJUKEBOX)
+		nfs_inc_stats(inode, NFSIOS_DELAY);
 	task->tk_status = 0;
 	rpc_restart_call(task);
 	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 0c6fda3..a187200 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -46,6 +46,7 @@
 	NFS4CLNT_DELEGRETURN,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_SESSION_DRAINING,
+	NFS4CLNT_RECALL_SLOT,
 };
 
 /*
@@ -280,6 +281,7 @@
 extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
 extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
+extern void nfs41_handle_recall_slot(struct nfs_client *clp);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 375f0fa..eda74c4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -281,6 +281,7 @@
 			}
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
+		case -EKEYEXPIRED:
 			ret = nfs4_delay(server->client, &exception->timeout);
 			if (ret != 0)
 				break;
@@ -418,7 +419,8 @@
 			clp->cl_last_renewal = timestamp;
 		spin_unlock(&clp->cl_lock);
 		/* Check sequence flags */
-		nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+		if (atomic_read(&clp->cl_count) > 1)
+			nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
 	}
 out:
 	/* The session may be reset by one of the error handlers. */
@@ -724,8 +726,8 @@
 	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
 	if (p->o_arg.seqid == NULL)
 		goto err_free;
-	p->path.mnt = mntget(path->mnt);
-	p->path.dentry = dget(path->dentry);
+	path_get(path);
+	p->path = *path;
 	p->dir = parent;
 	p->owner = sp;
 	atomic_inc(&sp->so_count);
@@ -1163,7 +1165,7 @@
 	int err;
 	do {
 		err = _nfs4_do_open_reclaim(ctx, state);
-		if (err != -NFS4ERR_DELAY)
+		if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED)
 			break;
 		nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
@@ -1582,6 +1584,7 @@
 			goto out;
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
+		case -EKEYEXPIRED:
 			nfs4_handle_exception(server, err, &exception);
 			err = 0;
 		}
@@ -1944,8 +1947,8 @@
 	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
 	calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
-	calldata->path.mnt = mntget(path->mnt);
-	calldata->path.dentry = dget(path->dentry);
+	path_get(path);
+	calldata->path = *path;
 
 	msg.rpc_argp = &calldata->arg,
 	msg.rpc_resp = &calldata->res,
@@ -3145,10 +3148,19 @@
  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
  * standalone procedure for queueing an asynchronous RENEW.
  */
+static void nfs4_renew_release(void *data)
+{
+	struct nfs_client *clp = data;
+
+	if (atomic_read(&clp->cl_count) > 1)
+		nfs4_schedule_state_renewal(clp);
+	nfs_put_client(clp);
+}
+
 static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
-	struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
-	unsigned long timestamp = (unsigned long)data;
+	struct nfs_client *clp = data;
+	unsigned long timestamp = task->tk_start;
 
 	if (task->tk_status < 0) {
 		/* Unless we're shutting down, schedule state recovery! */
@@ -3164,6 +3176,7 @@
 
 static const struct rpc_call_ops nfs4_renew_ops = {
 	.rpc_call_done = nfs4_renew_done,
+	.rpc_release = nfs4_renew_release,
 };
 
 int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
@@ -3174,8 +3187,10 @@
 		.rpc_cred	= cred,
 	};
 
+	if (!atomic_inc_not_zero(&clp->cl_count))
+		return -EIO;
 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
-			&nfs4_renew_ops, (void *)jiffies);
+			&nfs4_renew_ops, clp);
 }
 
 int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
@@ -3452,6 +3467,7 @@
 			if (server)
 				nfs_inc_server_stats(server, NFSIOS_DELAY);
 		case -NFS4ERR_GRACE:
+		case -EKEYEXPIRED:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
 			return -EAGAIN;
@@ -3564,6 +3580,7 @@
 			case -NFS4ERR_RESOURCE:
 				/* The IBM lawyers misread another document! */
 			case -NFS4ERR_DELAY:
+			case -EKEYEXPIRED:
 				err = nfs4_delay(clp->cl_rpcclient, &timeout);
 		}
 	} while (err == 0);
@@ -4179,7 +4196,7 @@
 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
 			return 0;
 		err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
-		if (err != -NFS4ERR_DELAY)
+		if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED)
 			break;
 		nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
@@ -4204,6 +4221,7 @@
 			goto out;
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
+		case -EKEYEXPIRED:
 			nfs4_handle_exception(server, err, &exception);
 			err = 0;
 		}
@@ -4355,6 +4373,7 @@
 				err = 0;
 				goto out;
 			case -NFS4ERR_DELAY:
+			case -EKEYEXPIRED:
 				break;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -4500,7 +4519,7 @@
 
 		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
 
-		if (status != NFS4ERR_CLID_INUSE)
+		if (status != -NFS4ERR_CLID_INUSE)
 			break;
 
 		if (signalled())
@@ -4554,6 +4573,7 @@
 	switch (task->tk_status) {
 	case -NFS4ERR_DELAY:
 	case -NFS4ERR_GRACE:
+	case -EKEYEXPIRED:
 		dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
 		rpc_delay(task, NFS4_POLL_RETRY_MIN);
 		task->tk_status = 0;
@@ -4611,26 +4631,32 @@
 /*
  * Reset a slot table
  */
-static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots,
-		int old_max_slots, int ivalue)
+static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
+				 int ivalue)
 {
+	struct nfs4_slot *new = NULL;
 	int i;
 	int ret = 0;
 
-	dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl);
+	dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
+		max_reqs, tbl->max_slots);
 
-	/*
-	 * Until we have dynamic slot table adjustment, insist
-	 * upon the same slot table size
-	 */
-	if (max_slots != old_max_slots) {
-		dprintk("%s reset slot table does't match old\n",
-			__func__);
-		ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */
-		goto out;
+	/* Does the newly negotiated max_reqs match the existing slot table? */
+	if (max_reqs != tbl->max_slots) {
+		ret = -ENOMEM;
+		new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
+			      GFP_KERNEL);
+		if (!new)
+			goto out;
+		ret = 0;
+		kfree(tbl->slots);
 	}
 	spin_lock(&tbl->slot_tbl_lock);
-	for (i = 0; i < max_slots; ++i)
+	if (new) {
+		tbl->slots = new;
+		tbl->max_slots = max_reqs;
+	}
+	for (i = 0; i < tbl->max_slots; ++i)
 		tbl->slots[i].seq_nr = ivalue;
 	spin_unlock(&tbl->slot_tbl_lock);
 	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
@@ -4648,16 +4674,12 @@
 	int status;
 
 	status = nfs4_reset_slot_table(&session->fc_slot_table,
-			session->fc_attrs.max_reqs,
-			session->fc_slot_table.max_slots,
-			1);
+			session->fc_attrs.max_reqs, 1);
 	if (status)
 		return status;
 
 	status = nfs4_reset_slot_table(&session->bc_slot_table,
-			session->bc_attrs.max_reqs,
-			session->bc_slot_table.max_slots,
-			0);
+			session->bc_attrs.max_reqs, 0);
 	return status;
 }
 
@@ -4798,16 +4820,14 @@
 	args->fc_attrs.headerpadsz = 0;
 	args->fc_attrs.max_rqst_sz = mxrqst_sz;
 	args->fc_attrs.max_resp_sz = mxresp_sz;
-	args->fc_attrs.max_resp_sz_cached = mxresp_sz;
 	args->fc_attrs.max_ops = NFS4_MAX_OPS;
 	args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs;
 
 	dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u "
-		"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
+		"max_ops=%u max_reqs=%u\n",
 		__func__,
 		args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz,
-		args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops,
-		args->fc_attrs.max_reqs);
+		args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
 
 	/* Back channel attributes */
 	args->bc_attrs.headerpadsz = 0;
@@ -5016,7 +5036,16 @@
 				       &res, args.sa_cache_this, 1);
 }
 
-void nfs41_sequence_call_done(struct rpc_task *task, void *data)
+static void nfs41_sequence_release(void *data)
+{
+	struct nfs_client *clp = (struct nfs_client *)data;
+
+	if (atomic_read(&clp->cl_count) > 1)
+		nfs4_schedule_state_renewal(clp);
+	nfs_put_client(clp);
+}
+
+static void nfs41_sequence_call_done(struct rpc_task *task, void *data)
 {
 	struct nfs_client *clp = (struct nfs_client *)data;
 
@@ -5024,6 +5053,8 @@
 
 	if (task->tk_status < 0) {
 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
+		if (atomic_read(&clp->cl_count) == 1)
+			goto out;
 
 		if (_nfs4_async_handle_error(task, NULL, clp, NULL)
 								== -EAGAIN) {
@@ -5032,7 +5063,7 @@
 		}
 	}
 	dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred);
-
+out:
 	kfree(task->tk_msg.rpc_argp);
 	kfree(task->tk_msg.rpc_resp);
 
@@ -5057,6 +5088,7 @@
 static const struct rpc_call_ops nfs41_sequence_ops = {
 	.rpc_call_done = nfs41_sequence_call_done,
 	.rpc_call_prepare = nfs41_sequence_prepare,
+	.rpc_release = nfs41_sequence_release,
 };
 
 static int nfs41_proc_async_sequence(struct nfs_client *clp,
@@ -5069,12 +5101,13 @@
 		.rpc_cred = cred,
 	};
 
+	if (!atomic_inc_not_zero(&clp->cl_count))
+		return -EIO;
 	args = kzalloc(sizeof(*args), GFP_KERNEL);
-	if (!args)
-		return -ENOMEM;
 	res = kzalloc(sizeof(*res), GFP_KERNEL);
-	if (!res) {
+	if (!args || !res) {
 		kfree(args);
+		nfs_put_client(clp);
 		return -ENOMEM;
 	}
 	res->sr_slotid = NFS4_MAX_SLOT_TABLE;
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 0156c01..d87f103 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -36,11 +36,6 @@
  * as an rpc_task, not a real kernel thread, so it always runs in rpciod's
  * context.  There is one renewd per nfs_server.
  *
- * TODO: If the send queue gets backlogged (e.g., if the server goes down),
- * we will keep filling the queue with periodic RENEW requests.  We need a
- * mechanism for ensuring that if renewd successfully sends off a request,
- * then it only wakes up when the request is finished.  Maybe use the
- * child task framework of the RPC layer?
  */
 
 #include <linux/mm.h>
@@ -63,7 +58,7 @@
 	struct nfs_client *clp =
 		container_of(work, struct nfs_client, cl_renewd.work);
 	struct rpc_cred *cred;
-	long lease, timeout;
+	long lease;
 	unsigned long last, now;
 
 	ops = nfs4_state_renewal_ops[clp->cl_minorversion];
@@ -75,7 +70,6 @@
 	lease = clp->cl_lease_time;
 	last = clp->cl_last_renewal;
 	now = jiffies;
-	timeout = (2 * lease) / 3 + (long)last - (long)now;
 	/* Are we close to a lease timeout? */
 	if (time_after(now, last + lease/3)) {
 		cred = ops->get_state_renewal_cred_locked(clp);
@@ -90,19 +84,15 @@
 			/* Queue an asynchronous RENEW. */
 			ops->sched_state_renewal(clp, cred);
 			put_rpccred(cred);
+			goto out_exp;
 		}
-		timeout = (2 * lease) / 3;
-		spin_lock(&clp->cl_lock);
-	} else
+	} else {
 		dprintk("%s: failed to call renewd. Reason: lease not expired \n",
 				__func__);
-	if (timeout < 5 * HZ)    /* safeguard */
-		timeout = 5 * HZ;
-	dprintk("%s: requeueing work. Lease period = %ld\n",
-			__func__, (timeout + HZ - 1) / HZ);
-	cancel_delayed_work(&clp->cl_renewd);
-	schedule_delayed_work(&clp->cl_renewd, timeout);
-	spin_unlock(&clp->cl_lock);
+		spin_unlock(&clp->cl_lock);
+	}
+	nfs4_schedule_state_renewal(clp);
+out_exp:
 	nfs_expire_unreferenced_delegations(clp);
 out:
 	dprintk("%s: done\n", __func__);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c1e2733..6c5ed51 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1249,26 +1249,65 @@
 }
 
 #ifdef CONFIG_NFS_V4_1
+void nfs41_handle_recall_slot(struct nfs_client *clp)
+{
+	set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+	nfs4_schedule_state_recovery(clp);
+}
+
+static void nfs4_reset_all_state(struct nfs_client *clp)
+{
+	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
+		clp->cl_boot_time = CURRENT_TIME;
+		nfs4_state_start_reclaim_nograce(clp);
+		nfs4_schedule_state_recovery(clp);
+	}
+}
+
+static void nfs41_handle_server_reboot(struct nfs_client *clp)
+{
+	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
+		nfs4_state_start_reclaim_reboot(clp);
+		nfs4_schedule_state_recovery(clp);
+	}
+}
+
+static void nfs41_handle_state_revoked(struct nfs_client *clp)
+{
+	/* Temporary */
+	nfs4_reset_all_state(clp);
+}
+
+static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
+{
+	/* This will need to handle layouts too */
+	nfs_expire_all_delegations(clp);
+}
+
+static void nfs41_handle_cb_path_down(struct nfs_client *clp)
+{
+	nfs_expire_all_delegations(clp);
+	if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
+		nfs4_schedule_state_recovery(clp);
+}
+
 void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
 {
 	if (!flags)
 		return;
-	else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) {
-		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
-		nfs4_state_start_reclaim_reboot(clp);
-		nfs4_schedule_state_recovery(clp);
-	} else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
+	else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
+		nfs41_handle_server_reboot(clp);
+	else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
 			    SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
 			    SEQ4_STATUS_ADMIN_STATE_REVOKED |
-			    SEQ4_STATUS_RECALLABLE_STATE_REVOKED |
-			    SEQ4_STATUS_LEASE_MOVED)) {
-		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
-		nfs4_state_start_reclaim_nograce(clp);
-		nfs4_schedule_state_recovery(clp);
-	} else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
+			    SEQ4_STATUS_LEASE_MOVED))
+		nfs41_handle_state_revoked(clp);
+	else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
+		nfs41_handle_recallable_state_revoked(clp);
+	else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
 			    SEQ4_STATUS_BACKCHANNEL_FAULT |
 			    SEQ4_STATUS_CB_PATH_DOWN_SESSION))
-		nfs_expire_all_delegations(clp);
+		nfs41_handle_cb_path_down(clp);
 }
 
 static int nfs4_reset_session(struct nfs_client *clp)
@@ -1285,23 +1324,52 @@
 
 	memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
 	status = nfs4_proc_create_session(clp);
-	if (status)
+	if (status) {
 		status = nfs4_recovery_handle_error(clp, status);
+		goto out;
+	}
+	/* create_session negotiated new slot table */
+	clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
 
-out:
-	/*
-	 * Let the state manager reestablish state
-	 */
-	if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
-	    status == 0)
+	 /* Let the state manager reestablish state */
+	if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
 		nfs41_setup_state_renewal(clp);
-
+out:
 	return status;
 }
 
+static int nfs4_recall_slot(struct nfs_client *clp)
+{
+	struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table;
+	struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs;
+	struct nfs4_slot *new, *old;
+	int i;
+
+	nfs4_begin_drain_session(clp);
+	new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
+		      GFP_KERNEL);
+        if (!new)
+		return -ENOMEM;
+
+	spin_lock(&fc_tbl->slot_tbl_lock);
+	for (i = 0; i < fc_tbl->target_max_slots; i++)
+		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
+	old = fc_tbl->slots;
+	fc_tbl->slots = new;
+	fc_tbl->max_slots = fc_tbl->target_max_slots;
+	fc_tbl->target_max_slots = 0;
+	fc_attrs->max_reqs = fc_tbl->max_slots;
+	spin_unlock(&fc_tbl->slot_tbl_lock);
+
+	kfree(old);
+	nfs4_end_drain_session(clp);
+	return 0;
+}
+
 #else /* CONFIG_NFS_V4_1 */
 static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
 static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
+static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
 #endif /* CONFIG_NFS_V4_1 */
 
 /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1314,6 +1382,7 @@
 		case -NFS4ERR_DELAY:
 		case -NFS4ERR_CLID_INUSE:
 		case -EAGAIN:
+		case -EKEYEXPIRED:
 			break;
 
 		case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
@@ -1397,6 +1466,15 @@
 			nfs_client_return_marked_delegations(clp);
 			continue;
 		}
+		/* Recall session slots */
+		if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)
+		   && nfs4_has_session(clp)) {
+			status = nfs4_recall_slot(clp);
+			if (status < 0)
+				goto out_error;
+			continue;
+		}
+
 
 		nfs4_clear_state_manager_bit(clp);
 		/* Did we race with an attempt to give us more work? */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5cd5184..4d338be 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1578,6 +1578,14 @@
 	char machine_name[NFS4_MAX_MACHINE_NAME_LEN];
 	uint32_t len;
 	struct nfs_client *clp = args->client;
+	u32 max_resp_sz_cached;
+
+	/*
+	 * Assumes OPEN is the biggest non-idempotent compound.
+	 * 2 is the verifier.
+	 */
+	max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE +
+			      RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT;
 
 	len = scnprintf(machine_name, sizeof(machine_name), "%s",
 			clp->cl_ipaddr);
@@ -1592,7 +1600,7 @@
 	*p++ = cpu_to_be32(args->fc_attrs.headerpadsz);	/* header padding size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz);	/* max req size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_resp_sz);	/* max resp size */
-	*p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
+	*p++ = cpu_to_be32(max_resp_sz_cached);		/* Max resp sz cached */
 	*p++ = cpu_to_be32(args->fc_attrs.max_ops);	/* max operations */
 	*p++ = cpu_to_be32(args->fc_attrs.max_reqs);	/* max requests */
 	*p++ = cpu_to_be32(0);				/* rdmachannel_attrs */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ef58385..c752d94 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -47,6 +47,39 @@
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 /*
+ * wrapper to handle the -EKEYEXPIRED error message. This should generally
+ * only happen if using krb5 auth and a user's TGT expires. NFSv2 doesn't
+ * support the NFSERR_JUKEBOX error code, but we handle this situation in the
+ * same way that we handle that error with NFSv3.
+ */
+static int
+nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
+{
+	int res;
+	do {
+		res = rpc_call_sync(clnt, msg, flags);
+		if (res != -EKEYEXPIRED)
+			break;
+		schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+		res = -ERESTARTSYS;
+	} while (!fatal_signal_pending(current));
+	return res;
+}
+
+#define rpc_call_sync(clnt, msg, flags)	nfs_rpc_wrapper(clnt, msg, flags)
+
+static int
+nfs_async_handle_expired_key(struct rpc_task *task)
+{
+	if (task->tk_status != -EKEYEXPIRED)
+		return 0;
+	task->tk_status = 0;
+	rpc_restart_call(task);
+	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
+	return 1;
+}
+
+/*
  * Bare-bones access to getattr: this is for nfs_read_super.
  */
 static int
@@ -307,6 +340,8 @@
 
 static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 {
+	if (nfs_async_handle_expired_key(task))
+		return 0;
 	nfs_mark_for_revalidate(dir);
 	return 1;
 }
@@ -560,6 +595,9 @@
 
 static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
+	if (nfs_async_handle_expired_key(task))
+		return -EAGAIN;
+
 	nfs_invalidate_atime(data->inode);
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
@@ -579,6 +617,9 @@
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
+	if (nfs_async_handle_expired_key(task))
+		return -EAGAIN;
+
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
 	return 0;
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738d..2ea9e5c 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@
 	struct page *page;
 	void *err;
 
-	err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
+	err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
 	if (err)
 		goto read_failed;
 	page = read_cache_page(&inode->i_data, 0,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d63d964..53ff70e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -438,6 +438,7 @@
 	radix_tree_tag_set(&nfsi->nfs_page_tree,
 			req->wb_index,
 			NFS_PAGE_TAG_COMMIT);
+	nfsi->ncommit++;
 	spin_unlock(&inode->i_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
@@ -501,57 +502,6 @@
 }
 #endif
 
-/*
- * Wait for a request to complete.
- *
- * Interruptible by fatal signals only.
- */
-static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_page *req;
-	pgoff_t idx_end, next;
-	unsigned int		res = 0;
-	int			error;
-
-	if (npages == 0)
-		idx_end = ~0;
-	else
-		idx_end = idx_start + npages - 1;
-
-	next = idx_start;
-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
-		if (req->wb_index > idx_end)
-			break;
-
-		next = req->wb_index + 1;
-		BUG_ON(!NFS_WBACK_BUSY(req));
-
-		kref_get(&req->wb_kref);
-		spin_unlock(&inode->i_lock);
-		error = nfs_wait_on_request(req);
-		nfs_release_request(req);
-		spin_lock(&inode->i_lock);
-		if (error < 0)
-			return error;
-		res++;
-	}
-	return res;
-}
-
-static void nfs_cancel_commit_list(struct list_head *head)
-{
-	struct nfs_page *req;
-
-	while(!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_clear_request_commit(req);
-		nfs_inode_remove_request(req);
-		nfs_unlock_request(req);
-	}
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -573,11 +523,17 @@
 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
 
 	if (!nfs_need_commit(nfsi))
 		return 0;
 
-	return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	if (ret > 0)
+		nfsi->ncommit -= ret;
+	if (nfs_need_commit(NFS_I(inode)))
+		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
 }
 #else
 static inline int nfs_need_commit(struct nfs_inode *nfsi)
@@ -642,9 +598,10 @@
 		spin_lock(&inode->i_lock);
 	}
 
-	if (nfs_clear_request_commit(req))
-		radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
-				req->wb_index, NFS_PAGE_TAG_COMMIT);
+	if (nfs_clear_request_commit(req) &&
+			radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
+				req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL)
+		NFS_I(inode)->ncommit--;
 
 	/* Okay, the request matches. Update the region */
 	if (offset < req->wb_offset) {
@@ -1391,7 +1348,7 @@
 	.rpc_release = nfs_commit_release,
 };
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
 	int res;
@@ -1406,92 +1363,51 @@
 	}
 	return res;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int flags = FLUSH_SYNC;
+	int ret = 0;
+
+	/* Don't commit yet if this is a non-blocking flush and there are
+	 * lots of outstanding writes for this mapping.
+	 */
+	if (wbc->sync_mode == WB_SYNC_NONE &&
+	    nfsi->ncommit <= (nfsi->npages >> 1))
+		goto out_mark_dirty;
+
+	if (wbc->nonblocking || wbc->for_background)
+		flags = 0;
+	ret = nfs_commit_inode(inode, flags);
+	if (ret >= 0) {
+		if (wbc->sync_mode == WB_SYNC_NONE) {
+			if (ret < wbc->nr_to_write)
+				wbc->nr_to_write -= ret;
+			else
+				wbc->nr_to_write = 0;
+		}
+		return 0;
+	}
+out_mark_dirty:
+	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
+}
 #else
-static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
+{
+	return 0;
+}
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
 {
 	return 0;
 }
 #endif
 
-long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	struct inode *inode = mapping->host;
-	pgoff_t idx_start, idx_end;
-	unsigned int npages = 0;
-	LIST_HEAD(head);
-	int nocommit = how & FLUSH_NOCOMMIT;
-	long pages, ret;
-
-	/* FIXME */
-	if (wbc->range_cyclic)
-		idx_start = 0;
-	else {
-		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (idx_end > idx_start) {
-			pgoff_t l_npages = 1 + idx_end - idx_start;
-			npages = l_npages;
-			if (sizeof(npages) != sizeof(l_npages) &&
-					(pgoff_t)npages != l_npages)
-				npages = 0;
-		}
-	}
-	how &= ~FLUSH_NOCOMMIT;
-	spin_lock(&inode->i_lock);
-	do {
-		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
-		if (ret != 0)
-			continue;
-		if (nocommit)
-			break;
-		pages = nfs_scan_commit(inode, &head, idx_start, npages);
-		if (pages == 0)
-			break;
-		if (how & FLUSH_INVALIDATE) {
-			spin_unlock(&inode->i_lock);
-			nfs_cancel_commit_list(&head);
-			ret = pages;
-			spin_lock(&inode->i_lock);
-			continue;
-		}
-		pages += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&inode->i_lock);
-		ret = nfs_commit_list(inode, &head, how);
-		spin_lock(&inode->i_lock);
-
-	} while (ret >= 0);
-	spin_unlock(&inode->i_lock);
-	return ret;
-}
-
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	int ret;
-
-	ret = nfs_writepages(mapping, wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, wbc, how);
-	if (ret < 0)
-		goto out;
-	return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
-{
-	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = 0,
-		.range_end = LLONG_MAX,
-	};
-
-	return __nfs_write_mapping(mapping, &wbc, how);
+	return nfs_commit_unstable_pages(inode, wbc);
 }
 
 /*
@@ -1499,37 +1415,26 @@
  */
 int nfs_wb_all(struct inode *inode)
 {
-	return nfs_write_mapping(inode->i_mapping, 0);
-}
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = LONG_MAX,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
+	};
 
-int nfs_wb_nocommit(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+	return sync_inode(inode, &wbc);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 {
 	struct nfs_page *req;
-	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = range_start,
-		.range_end = range_end,
-	};
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
 	for (;;) {
 		req = nfs_page_find_request(page);
 		if (req == NULL)
-			goto out;
-		if (test_bit(PG_CLEAN, &req->wb_flags)) {
-			nfs_release_request(req);
 			break;
-		}
 		if (nfs_lock_request_dontget(req)) {
 			nfs_inode_remove_request(req);
 			/*
@@ -1543,52 +1448,52 @@
 		ret = nfs_wait_on_request(req);
 		nfs_release_request(req);
 		if (ret < 0)
-			goto out;
-	}
-	if (!PagePrivate(page))
-		return 0;
-	ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
-out:
-	return ret;
-}
-
-static int nfs_wb_page_priority(struct inode *inode, struct page *page,
-				int how)
-{
-	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = range_start,
-		.range_end = range_end,
-	};
-	int ret;
-
-	do {
-		if (clear_page_dirty_for_io(page)) {
-			ret = nfs_writepage_locked(page, &wbc);
-			if (ret < 0)
-				goto out_error;
-		} else if (!PagePrivate(page))
 			break;
-		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
-		if (ret < 0)
-			goto out_error;
-	} while (PagePrivate(page));
-	return 0;
-out_error:
-	__mark_inode_dirty(inode, I_DIRTY_PAGES);
+	}
 	return ret;
 }
 
 /*
  * Write back all requests on one page - we do this before reading it.
  */
-int nfs_wb_page(struct inode *inode, struct page* page)
+int nfs_wb_page(struct inode *inode, struct page *page)
 {
-	return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
+	loff_t range_start = page_offset(page);
+	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0,
+		.range_start = range_start,
+		.range_end = range_end,
+	};
+	struct nfs_page *req;
+	int need_commit;
+	int ret;
+
+	while(PagePrivate(page)) {
+		if (clear_page_dirty_for_io(page)) {
+			ret = nfs_writepage_locked(page, &wbc);
+			if (ret < 0)
+				goto out_error;
+		}
+		req = nfs_find_and_lock_request(page);
+		if (!req)
+			break;
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
+			goto out_error;
+		}
+		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
+		nfs_clear_page_tag_locked(req);
+		if (need_commit) {
+			ret = nfs_commit_inode(inode, FLUSH_SYNC);
+			if (ret < 0)
+				goto out_error;
+		}
+	}
+	return 0;
+out_error:
+	return ret;
 }
 
 #ifdef CONFIG_MIGRATION
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index d3854d9..bf9cbd2 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -36,10 +36,9 @@
 		return ERR_PTR(error);
 
 	if (flags == O_RDWR)
-		error = may_open(&nd.path, MAY_READ|MAY_WRITE,
-					   FMODE_READ|FMODE_WRITE);
+		error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
 	else
-		error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE);
+		error = may_open(&nd.path, MAY_WRITE, flags);
 
 	if (!error)
 		return dentry_open(nd.path.dentry, nd.path.mnt, flags,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a8587e9..bbf72d8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2121,9 +2121,15 @@
 		 * and this is the root of a cross-mounted filesystem.
 		 */
 		if (ignore_crossmnt == 0 &&
-		    exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) {
-			err = vfs_getattr(exp->ex_path.mnt->mnt_parent,
-				exp->ex_path.mnt->mnt_mountpoint, &stat);
+		    dentry == exp->ex_path.mnt->mnt_root) {
+			struct path path = exp->ex_path;
+			path_get(&path);
+			while (follow_up(&path)) {
+				if (path.dentry != path.mnt->mnt_root)
+					break;
+			}
+			err = vfs_getattr(path.mnt, path.dentry, &stat);
+			path_put(&path);
 			if (err)
 				goto out_nfserr;
 		}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8715d19..8eca17d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -20,7 +20,6 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/delay.h>
-#include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
@@ -361,7 +360,7 @@
 		 * If we are changing the size of the file, then
 		 * we need to break all leases.
 		 */
-		host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
+		host_err = break_lease(inode, O_WRONLY | O_NONBLOCK);
 		if (host_err == -EWOULDBLOCK)
 			host_err = -ETIMEDOUT;
 		if (host_err) /* ENOMEM or EWOULDBLOCK */
@@ -377,7 +376,6 @@
 			put_write_access(inode);
 			goto out_nfserr;
 		}
-		vfs_dq_init(inode);
 	}
 
 	/* sanitize the mode change */
@@ -734,7 +732,7 @@
 	 * Check to see if there are any leases on this file.
 	 * This may block while leases are broken.
 	 */
-	host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
+	host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
 	if (host_err == -EWOULDBLOCK)
 		host_err = -ETIMEDOUT;
 	if (host_err) /* NOMEM or WOULDBLOCK */
@@ -745,8 +743,6 @@
 			flags = O_RDWR|O_LARGEFILE;
 		else
 			flags = O_WRONLY|O_LARGEFILE;
-
-		vfs_dq_init(inode);
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
 			    flags, current_cred());
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 76d803e..0092840 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -224,7 +224,7 @@
  * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller.
  */
 static int
-nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de)
+nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de)
 {
 	if (len != de->name_len)
 		return 0;
@@ -349,11 +349,11 @@
  * Entry is guaranteed to be valid.
  */
 struct nilfs_dir_entry *
-nilfs_find_entry(struct inode *dir, struct dentry *dentry,
+nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
 		 struct page **res_page)
 {
-	const char *name = dentry->d_name.name;
-	int namelen = dentry->d_name.len;
+	const unsigned char *name = qstr->name;
+	int namelen = qstr->len;
 	unsigned reclen = NILFS_DIR_REC_LEN(namelen);
 	unsigned long start, n;
 	unsigned long npages = dir_pages(dir);
@@ -424,13 +424,13 @@
 	return de;
 }
 
-ino_t nilfs_inode_by_name(struct inode *dir, struct dentry *dentry)
+ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
 {
 	ino_t res = 0;
 	struct nilfs_dir_entry *de;
 	struct page *page;
 
-	de = nilfs_find_entry(dir, dentry, &page);
+	de = nilfs_find_entry(dir, qstr, &page);
 	if (de) {
 		res = le64_to_cpu(de->inode);
 		kunmap(page);
@@ -465,7 +465,7 @@
 int nilfs_add_link(struct dentry *dentry, struct inode *inode)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	int namelen = dentry->d_name.len;
 	unsigned chunk_size = nilfs_chunk_size(dir);
 	unsigned reclen = NILFS_DIR_REC_LEN(namelen);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 07ba838..ad6ed2c 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -67,7 +67,7 @@
 	if (dentry->d_name.len > NILFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	ino = nilfs_inode_by_name(dir, dentry);
+	ino = nilfs_inode_by_name(dir, &dentry->d_name);
 	inode = NULL;
 	if (ino) {
 		inode = nilfs_iget(dir->i_sb, ino);
@@ -81,10 +81,7 @@
 {
 	unsigned long ino;
 	struct inode *inode;
-	struct dentry dotdot;
-
-	dotdot.d_name.name = "..";
-	dotdot.d_name.len = 2;
+	struct qstr dotdot = {.name = "..", .len = 2};
 
 	ino = nilfs_inode_by_name(child->d_inode, &dotdot);
 	if (!ino)
@@ -296,7 +293,7 @@
 	int err;
 
 	err = -ENOENT;
-	de = nilfs_find_entry(dir, dentry, &page);
+	de = nilfs_find_entry(dir, &dentry->d_name, &page);
 	if (!de)
 		goto out;
 
@@ -389,7 +386,7 @@
 		return err;
 
 	err = -ENOENT;
-	old_de = nilfs_find_entry(old_dir, old_dentry, &old_page);
+	old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;
 
@@ -409,7 +406,7 @@
 			goto out_dir;
 
 		err = -ENOENT;
-		new_de = nilfs_find_entry(new_dir, new_dentry, &new_page);
+		new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
 		if (!new_de)
 			goto out_dir;
 		inc_nlink(old_inode);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 4da6f67..8723e5b 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -217,10 +217,10 @@
 
 /* dir.c */
 extern int nilfs_add_link(struct dentry *, struct inode *);
-extern ino_t nilfs_inode_by_name(struct inode *, struct dentry *);
+extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
 extern int nilfs_make_empty(struct inode *, struct inode *);
 extern struct nilfs_dir_entry *
-nilfs_find_entry(struct inode *, struct dentry *, struct page **);
+nilfs_find_entry(struct inode *, const struct qstr *, struct page **);
 extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *);
 extern int nilfs_empty_dir(struct inode *);
 extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a94e8bd..472cdf2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -29,14 +29,12 @@
 #include <linux/init.h> /* module_init */
 #include <linux/inotify.h>
 #include <linux/kernel.h> /* roundup() */
-#include <linux/magic.h> /* superblock magic number */
-#include <linux/mount.h> /* mntget */
 #include <linux/namei.h> /* LOOKUP_FOLLOW */
-#include <linux/path.h> /* struct path */
 #include <linux/sched.h> /* struct user */
 #include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
 #include <linux/types.h>
+#include <linux/anon_inodes.h>
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
@@ -45,8 +43,6 @@
 
 #include <asm/ioctls.h>
 
-static struct vfsmount *inotify_mnt __read_mostly;
-
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
 static int inotify_max_queued_events __read_mostly;
@@ -645,9 +641,7 @@
 {
 	struct fsnotify_group *group;
 	struct user_struct *user;
-	struct file *filp;
-	struct path path;
-	int fd, ret;
+	int ret;
 
 	/* Check the IN_* constants for consistency.  */
 	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
@@ -656,10 +650,6 @@
 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
-	fd = get_unused_fd_flags(flags & O_CLOEXEC);
-	if (fd < 0)
-		return fd;
-
 	user = get_current_user();
 	if (unlikely(atomic_read(&user->inotify_devs) >=
 			inotify_max_user_instances)) {
@@ -676,27 +666,14 @@
 
 	atomic_inc(&user->inotify_devs);
 
-	path.mnt = inotify_mnt;
-	path.dentry = inotify_mnt->mnt_root;
-	path_get(&path);
-	filp = alloc_file(&path, FMODE_READ, &inotify_fops);
-	if (!filp)
-		goto Enfile;
+	ret = anon_inode_getfd("inotify", &inotify_fops, group,
+				  O_RDONLY | flags);
+	if (ret >= 0)
+		return ret;
 
-	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	filp->private_data = group;
-
-	fd_install(fd, filp);
-
-	return fd;
-
-Enfile:
-	ret = -ENFILE;
-	path_put(&path);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
-	put_unused_fd(fd);
 	return ret;
 }
 
@@ -783,20 +760,6 @@
 	return ret;
 }
 
-static int
-inotify_get_sb(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	return get_sb_pseudo(fs_type, "inotify", NULL,
-			INOTIFYFS_SUPER_MAGIC, mnt);
-}
-
-static struct file_system_type inotify_fs_type = {
-    .name	= "inotifyfs",
-    .get_sb	= inotify_get_sb,
-    .kill_sb	= kill_anon_super,
-};
-
 /*
  * inotify_user_setup - Our initialization function.  Note that we cannnot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
@@ -804,16 +767,6 @@
  */
 static int __init inotify_user_setup(void)
 {
-	int ret;
-
-	ret = register_filesystem(&inotify_fs_type);
-	if (unlikely(ret))
-		panic("inotify: register_filesystem returned %d!\n", ret);
-
-	inotify_mnt = kern_mount(&inotify_fs_type);
-	if (IS_ERR(inotify_mnt))
-		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
-
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
 
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 5a9e344..9173e82 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1545,7 +1545,7 @@
  		write_inode_now(bmp_vi, !datasync);
 		iput(bmp_vi);
 	}
-	ret = ntfs_write_inode(vi, 1);
+	ret = __ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
 	err = sync_blockdev(vi->i_sb->s_bdev);
 	if (unlikely(err && !ret))
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 43179dd..b681c71 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2182,7 +2182,7 @@
 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
 	BUG_ON(S_ISDIR(vi->i_mode));
 	if (!datasync || !NInoNonResident(NTFS_I(vi)))
-		ret = ntfs_write_inode(vi, 1);
+		ret = __ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
 	/*
 	 * NOTE: If we were to use mapping->private_list (see ext2 and
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc2505a..4b57fb1 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2957,7 +2957,7 @@
  *
  * Return 0 on success and -errno on error.
  */
-int ntfs_write_inode(struct inode *vi, int sync)
+int __ntfs_write_inode(struct inode *vi, int sync)
 {
 	sle64 nt;
 	ntfs_inode *ni = NTFS_I(vi);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 117eaf8..9a11354 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -307,12 +307,12 @@
 
 extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
 
-extern int ntfs_write_inode(struct inode *vi, int sync);
+extern int __ntfs_write_inode(struct inode *vi, int sync);
 
 static inline void ntfs_commit_inode(struct inode *vi)
 {
 	if (!is_bad_inode(vi))
-		ntfs_write_inode(vi, 1);
+		__ntfs_write_inode(vi, 1);
 	return;
 }
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 80b0477..1cf39df 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -39,6 +39,7 @@
 #include "dir.h"
 #include "debug.h"
 #include "index.h"
+#include "inode.h"
 #include "aops.h"
 #include "layout.h"
 #include "malloc.h"
@@ -2662,6 +2663,13 @@
 	return 0;
 }
 
+#ifdef NTFS_RW
+static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
+{
+	return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL);
+}
+#endif
+
 /**
  * The complete super operations.
  */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 2bbe1ecc..9f8bd91 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5713,7 +5713,7 @@
 		goto out;
 	}
 
-	vfs_dq_free_space_nodirty(inode,
+	dquot_free_space_nodirty(inode,
 				  ocfs2_clusters_to_bytes(inode->i_sb, len));
 
 	ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
@@ -6936,7 +6936,7 @@
 		goto bail;
 	}
 
-	vfs_dq_free_space_nodirty(inode,
+	dquot_free_space_nodirty(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
@@ -7301,11 +7301,10 @@
 		unsigned int page_end;
 		u64 phys;
 
-		if (vfs_dq_alloc_space_nodirty(inode,
-				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
-			ret = -EDQUOT;
+		ret = dquot_alloc_space_nodirty(inode,
+				       ocfs2_clusters_to_bytes(osb->sb, 1));
+		if (ret)
 			goto out_commit;
-		}
 		did_quota = 1;
 
 		ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
@@ -7381,7 +7380,7 @@
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(inode,
+		dquot_free_space_nodirty(inode,
 					  ocfs2_clusters_to_bytes(osb->sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4c2a6d2..21441dd 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1764,10 +1764,11 @@
 
 	wc->w_handle = handle;
 
-	if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
-			ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
-		ret = -EDQUOT;
-		goto out_commit;
+	if (clusters_to_alloc) {
+		ret = dquot_alloc_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
+		if (ret)
+			goto out_commit;
 	}
 	/*
 	 * We don't want this to fail in ocfs2_write_end(), so do it
@@ -1810,7 +1811,7 @@
 	return 0;
 out_quota:
 	if (clusters_to_alloc)
-		vfs_dq_free_space(inode,
+		dquot_free_space(inode,
 			  ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
 out_commit:
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 765d66c..efd77d0 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2964,12 +2964,10 @@
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				ocfs2_clusters_to_bytes(osb->sb,
-							alloc + dx_alloc))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+		ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
@@ -3178,7 +3176,7 @@
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir, bytes_allocated);
+		dquot_free_space_nodirty(dir, bytes_allocated);
 
 	ocfs2_commit_trans(osb, handle);
 
@@ -3221,11 +3219,10 @@
 	if (extend) {
 		u32 offset = OCFS2_I(dir)->ip_clusters;
 
-		if (vfs_dq_alloc_space_nodirty(dir,
-					ocfs2_clusters_to_bytes(sb, 1))) {
-			status = -EDQUOT;
+		status = dquot_alloc_space_nodirty(dir,
+					ocfs2_clusters_to_bytes(sb, 1));
+		if (status)
 			goto bail;
-		}
 		did_quota = 1;
 
 		status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
@@ -3254,7 +3251,7 @@
 	status = 0;
 bail:
 	if (did_quota && status < 0)
-		vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
+		dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
 	mlog_exit(status);
 	return status;
 }
@@ -3889,11 +3886,10 @@
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				       ocfs2_clusters_to_bytes(dir->i_sb, 1))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+				       ocfs2_clusters_to_bytes(dir->i_sb, 1));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
@@ -3983,7 +3979,7 @@
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir,
+		dquot_free_space_nodirty(dir,
 				ocfs2_clusters_to_bytes(dir->i_sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
@@ -4165,11 +4161,10 @@
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+				       ocfs2_clusters_to_bytes(osb->sb, 1));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	/*
@@ -4229,7 +4224,7 @@
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir,
+		dquot_free_space_nodirty(dir,
 					  ocfs2_clusters_to_bytes(dir->i_sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5b52547..17947dc 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -107,6 +107,9 @@
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
 		   file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
 
+	if (file->f_mode & FMODE_WRITE)
+		dquot_initialize(inode);
+
 	spin_lock(&oi->ip_lock);
 
 	/* Check that the inode hasn't been wiped from disk by another
@@ -629,11 +632,10 @@
 	}
 
 restarted_transaction:
-	if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
-	    clusters_to_add))) {
-		status = -EDQUOT;
+	status = dquot_alloc_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
+	if (status)
 		goto leave;
-	}
 	did_quota = 1;
 
 	/* reserve a write to the file entry early on - that we if we
@@ -674,7 +676,7 @@
 	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 	/* Release unused quota reservation */
-	vfs_dq_free_space(inode,
+	dquot_free_space(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
 	did_quota = 0;
 
@@ -710,7 +712,7 @@
 
 leave:
 	if (status < 0 && did_quota)
-		vfs_dq_free_space(inode,
+		dquot_free_space(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
 	if (handle) {
 		ocfs2_commit_trans(osb, handle);
@@ -978,6 +980,8 @@
 
 	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
 	if (size_change) {
+		dquot_initialize(inode);
+
 		status = ocfs2_rw_lock(inode, 1);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1020,7 +1024,7 @@
 		/*
 		 * Gather pointers to quota structures so that allocation /
 		 * freeing of quota structures happens here and not inside
-		 * vfs_dq_transfer() where we have problems with lock ordering
+		 * dquot_transfer() where we have problems with lock ordering
 		 */
 		if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
 		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
@@ -1053,7 +1057,7 @@
 			mlog_errno(status);
 			goto bail_unlock;
 		}
-		status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		status = dquot_transfer(inode, attr);
 		if (status < 0)
 			goto bail_commit;
 	} else {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 88459bd..278a223 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -665,7 +665,7 @@
 	}
 
 	ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 	status = ocfs2_free_dinode(handle, inode_alloc_inode,
 				   inode_alloc_bh, di);
@@ -971,6 +971,8 @@
 		goto bail;
 	}
 
+	dquot_initialize(inode);
+
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
 		/* It's probably not necessary to truncate_inode_pages
 		 * here but we do it for safety anyway (it will most
@@ -1087,6 +1089,8 @@
 	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
 			"Inode=%lu\n", inode->i_ino);
 
+	dquot_drop(inode);
+
 	/* To preven remote deletes we hold open lock before, now it
 	 * is time to unlock PR and EX open locks. */
 	ocfs2_open_unlock(inode);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 50fb26a..d9cd4e3 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -212,7 +212,7 @@
 	} else
 		inode->i_gid = current_fsgid();
 	inode->i_mode = mode;
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	return inode;
 }
 
@@ -244,6 +244,8 @@
 		   (unsigned long)dev, dentry->d_name.len,
 		   dentry->d_name.name);
 
+	dquot_initialize(dir);
+
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
@@ -348,13 +350,9 @@
 		goto leave;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto leave;
-	}
 	did_quota_inode = 1;
 
 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
@@ -431,7 +429,7 @@
 	status = 0;
 leave:
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
@@ -636,6 +634,8 @@
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
+	dquot_initialize(dir);
+
 	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
 	if (err < 0) {
 		if (err != -ENOENT)
@@ -791,6 +791,8 @@
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
 		   dentry->d_name.len, dentry->d_name.name);
 
+	dquot_initialize(dir);
+
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
 	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1051,6 +1053,9 @@
 		   old_dentry->d_name.len, old_dentry->d_name.name,
 		   new_dentry->d_name.len, new_dentry->d_name.name);
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	osb = OCFS2_SB(old_dir->i_sb);
 
 	if (new_inode) {
@@ -1599,6 +1604,8 @@
 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
 
+	dquot_initialize(dir);
+
 	sb = dir->i_sb;
 	osb = OCFS2_SB(sb);
 
@@ -1688,13 +1695,9 @@
 		goto bail;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto bail;
-	}
 	did_quota_inode = 1;
 
 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
@@ -1716,11 +1719,10 @@
 		u32 offset = 0;
 
 		inode->i_op = &ocfs2_symlink_inode_operations;
-		if (vfs_dq_alloc_space_nodirty(inode,
-		    ocfs2_clusters_to_bytes(osb->sb, 1))) {
-			status = -EDQUOT;
+		status = dquot_alloc_space_nodirty(inode,
+		    ocfs2_clusters_to_bytes(osb->sb, 1));
+		if (status)
 			goto bail;
-		}
 		did_quota = 1;
 		status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
 					      new_fe_bh,
@@ -1788,10 +1790,10 @@
 	d_instantiate(dentry, inode);
 bail:
 	if (status < 0 && did_quota)
-		vfs_dq_free_space_nodirty(inode,
+		dquot_free_space_nodirty(inode,
 					ocfs2_clusters_to_bytes(osb->sb, 1));
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
@@ -2099,13 +2101,9 @@
 		goto leave;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto leave;
-	}
 	did_quota_inode = 1;
 
 	inode->i_nlink = 0;
@@ -2140,7 +2138,7 @@
 	insert_inode_hash(inode);
 leave:
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index b437dc0..355f41d 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -851,13 +851,6 @@
 }
 
 const struct dquot_operations ocfs2_quota_operations = {
-	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
-	.free_inode	= dquot_free_inode,
-	.transfer	= dquot_transfer,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
 	.release_dquot	= ocfs2_release_dquot,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index fb6aa7a..9e96921 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4390,7 +4390,7 @@
 	}
 
 	mutex_lock(&inode->i_mutex);
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 	error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index f3b7c15..75d9b5b 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -11,6 +11,7 @@
 #include <linux/parser.h>
 #include <linux/buffer_head.h>
 #include <linux/vmalloc.h>
+#include <linux/writeback.h>
 #include <linux/crc-itu-t.h>
 #include "omfs.h"
 
@@ -89,7 +90,7 @@
 	oi->i_head.h_check_xor = xor;
 }
 
-static int omfs_write_inode(struct inode *inode, int wait)
+static int __omfs_write_inode(struct inode *inode, int wait)
 {
 	struct omfs_inode *oi;
 	struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
@@ -162,9 +163,14 @@
 	return ret;
 }
 
+static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int omfs_sync_inode(struct inode *inode)
 {
-	return omfs_write_inode(inode, 1);
+	return __omfs_write_inode(inode, 1);
 }
 
 /*
diff --git a/fs/open.c b/fs/open.c
index 040cef7..e17f544 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -8,7 +8,6 @@
 #include <linux/mm.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
-#include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -271,17 +270,15 @@
 	 * Make sure that there are no leases.  get_write_access() protects
 	 * against the truncate racing with a lease-granting setlease().
 	 */
-	error = break_lease(inode, FMODE_WRITE);
+	error = break_lease(inode, O_WRONLY);
 	if (error)
 		goto put_write_and_out;
 
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error)
 		error = security_path_truncate(&path, length, 0);
-	if (!error) {
-		vfs_dq_init(inode);
+	if (!error)
 		error = do_truncate(path.dentry, length, 0, NULL);
-	}
 
 put_write_and_out:
 	put_write_access(inode);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8d5f392..5cc564a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -86,7 +86,7 @@
 
 	/*
 	 * slave 'mnt' to a peer mount that has the
-	 * same root dentry. If none is available than
+	 * same root dentry. If none is available then
 	 * slave it to anything that is available.
 	 */
 	while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
@@ -147,6 +147,11 @@
  * get the next mount in the propagation tree.
  * @m: the mount seen last
  * @origin: the original mount from where the tree walk initiated
+ *
+ * Note that peer groups form contiguous segments of slave lists.
+ * We rely on that in get_source() to be able to find out if
+ * vfsmount found while iterating with propagation_next() is
+ * a peer of one we'd found earlier.
  */
 static struct vfsmount *propagation_next(struct vfsmount *m,
 					 struct vfsmount *origin)
@@ -186,10 +191,6 @@
 {
 	struct vfsmount *p_last_src = NULL;
 	struct vfsmount *p_last_dest = NULL;
-	*type = CL_PROPAGATION;
-
-	if (IS_MNT_SHARED(dest))
-		*type |= CL_MAKE_SHARED;
 
 	while (last_dest != dest->mnt_master) {
 		p_last_dest = last_dest;
@@ -202,13 +203,18 @@
 		do {
 			p_last_dest = next_peer(p_last_dest);
 		} while (IS_MNT_NEW(p_last_dest));
+		/* is that a peer of the earlier? */
+		if (dest == p_last_dest) {
+			*type = CL_MAKE_SHARED;
+			return p_last_src;
+		}
 	}
-
-	if (dest != p_last_dest) {
-		*type |= CL_SLAVE;
-		return last_src;
-	} else
-		return p_last_src;
+	/* slave of the earlier, then */
+	*type = CL_SLAVE;
+	/* beginning of peer group among the slaves? */
+	if (IS_MNT_SHARED(dest))
+		*type |= CL_MAKE_SHARED;
+	return last_src;
 }
 
 /*
diff --git a/fs/pnode.h b/fs/pnode.h
index 958665d..1ea4ae1 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -21,12 +21,11 @@
 #define CL_SLAVE     		0x02
 #define CL_COPY_ALL 		0x04
 #define CL_MAKE_SHARED 		0x08
-#define CL_PROPAGATION 		0x10
-#define CL_PRIVATE 		0x20
+#define CL_PRIVATE 		0x10
 
 static inline void set_mnt_shared(struct vfsmount *mnt)
 {
-	mnt->mnt_flags &= ~MNT_PNODE_MASK;
+	mnt->mnt_flags &= ~MNT_SHARED_MASK;
 	mnt->mnt_flags |= MNT_SHARED;
 }
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 623e2ff..a731084 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -647,17 +647,11 @@
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
 	struct proc_mounts *p = file->private_data;
-	struct mnt_namespace *ns = p->ns;
 	unsigned res = POLLIN | POLLRDNORM;
 
-	poll_wait(file, &ns->poll, wait);
-
-	spin_lock(&vfsmount_lock);
-	if (p->event != ns->event) {
-		p->event = ns->event;
+	poll_wait(file, &p->ns->poll, wait);
+	if (mnt_had_events(p))
 		res |= POLLERR | POLLPRI;
-	}
-	spin_unlock(&vfsmount_lock);
 
 	return res;
 }
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 480cb10..9580abe 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -662,6 +662,7 @@
 	}
 	return ent;
 }
+EXPORT_SYMBOL(proc_symlink);
 
 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
 		struct proc_dir_entry *parent)
@@ -700,6 +701,7 @@
 {
 	return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
 }
+EXPORT_SYMBOL(proc_mkdir);
 
 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 					 struct proc_dir_entry *parent)
@@ -728,6 +730,7 @@
 	}
 	return ent;
 }
+EXPORT_SYMBOL(create_proc_entry);
 
 struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
 					struct proc_dir_entry *parent,
@@ -762,6 +765,7 @@
 out:
 	return NULL;
 }
+EXPORT_SYMBOL(proc_create_data);
 
 static void free_proc_entry(struct proc_dir_entry *de)
 {
@@ -853,3 +857,4 @@
 			de->parent->name, de->name, de->subdir->name);
 	pde_put(de);
 }
+EXPORT_SYMBOL(remove_proc_entry);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b080b79..757c069 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -220,9 +220,3 @@
 {
 	mntput(ns->proc_mnt);
 }
-
-EXPORT_SYMBOL(proc_symlink);
-EXPORT_SYMBOL(proc_mkdir);
-EXPORT_SYMBOL(create_proc_entry);
-EXPORT_SYMBOL(proc_create_data);
-EXPORT_SYMBOL(remove_proc_entry);
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index efc02eb..dad7fb2 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -59,3 +59,8 @@
 	bool
 	depends on XFS_QUOTA || QUOTA
 	default y
+
+config QUOTACTL_COMPAT
+	bool
+	depends on QUOTACTL && COMPAT_FOR_U64_ALIGNMENT
+	default y
diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 68d4f6d..5f9e9e2 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -3,3 +3,5 @@
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
 obj-$(CONFIG_QUOTA_TREE)	+= quota_tree.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
+obj-$(CONFIG_QUOTACTL_COMPAT)	+= compat.o
+obj-$(CONFIG_QUOTA_NETLINK_INTERFACE)	+= netlink.o
diff --git a/fs/quota/compat.c b/fs/quota/compat.c
new file mode 100644
index 0000000..fb1892f
--- /dev/null
+++ b/fs/quota/compat.c
@@ -0,0 +1,118 @@
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/quotaops.h>
+
+/*
+ * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64)
+ * and is necessary due to alignment problems.
+ */
+struct compat_if_dqblk {
+	compat_u64 dqb_bhardlimit;
+	compat_u64 dqb_bsoftlimit;
+	compat_u64 dqb_curspace;
+	compat_u64 dqb_ihardlimit;
+	compat_u64 dqb_isoftlimit;
+	compat_u64 dqb_curinodes;
+	compat_u64 dqb_btime;
+	compat_u64 dqb_itime;
+	compat_uint_t dqb_valid;
+};
+
+/* XFS structures */
+struct compat_fs_qfilestat {
+	compat_u64 dqb_bhardlimit;
+	compat_u64 qfs_nblks;
+	compat_uint_t qfs_nextents;
+};
+
+struct compat_fs_quota_stat {
+	__s8		qs_version;
+	__u16		qs_flags;
+	__s8		qs_pad;
+	struct compat_fs_qfilestat	qs_uquota;
+	struct compat_fs_qfilestat	qs_gquota;
+	compat_uint_t	qs_incoredqs;
+	compat_int_t	qs_btimelimit;
+	compat_int_t	qs_itimelimit;
+	compat_int_t	qs_rtbtimelimit;
+	__u16		qs_bwarnlimit;
+	__u16		qs_iwarnlimit;
+};
+
+asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+						qid_t id, void __user *addr)
+{
+	unsigned int cmds;
+	struct if_dqblk __user *dqblk;
+	struct compat_if_dqblk __user *compat_dqblk;
+	struct fs_quota_stat __user *fsqstat;
+	struct compat_fs_quota_stat __user *compat_fsqstat;
+	compat_uint_t data;
+	u16 xdata;
+	long ret;
+
+	cmds = cmd >> SUBCMDSHIFT;
+
+	switch (cmds) {
+	case Q_GETQUOTA:
+		dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
+		compat_dqblk = addr;
+		ret = sys_quotactl(cmd, special, id, dqblk);
+		if (ret)
+			break;
+		if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
+			get_user(data, &dqblk->dqb_valid) ||
+			put_user(data, &compat_dqblk->dqb_valid))
+			ret = -EFAULT;
+		break;
+	case Q_SETQUOTA:
+		dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
+		compat_dqblk = addr;
+		ret = -EFAULT;
+		if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) ||
+			get_user(data, &compat_dqblk->dqb_valid) ||
+			put_user(data, &dqblk->dqb_valid))
+			break;
+		ret = sys_quotactl(cmd, special, id, dqblk);
+		break;
+	case Q_XGETQSTAT:
+		fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
+		compat_fsqstat = addr;
+		ret = sys_quotactl(cmd, special, id, fsqstat);
+		if (ret)
+			break;
+		ret = -EFAULT;
+		/* Copying qs_version, qs_flags, qs_pad */
+		if (copy_in_user(compat_fsqstat, fsqstat,
+			offsetof(struct compat_fs_quota_stat, qs_uquota)))
+			break;
+		/* Copying qs_uquota */
+		if (copy_in_user(&compat_fsqstat->qs_uquota,
+			&fsqstat->qs_uquota,
+			sizeof(compat_fsqstat->qs_uquota)) ||
+			get_user(data, &fsqstat->qs_uquota.qfs_nextents) ||
+			put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents))
+			break;
+		/* Copying qs_gquota */
+		if (copy_in_user(&compat_fsqstat->qs_gquota,
+			&fsqstat->qs_gquota,
+			sizeof(compat_fsqstat->qs_gquota)) ||
+			get_user(data, &fsqstat->qs_gquota.qfs_nextents) ||
+			put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents))
+			break;
+		/* Copying the rest */
+		if (copy_in_user(&compat_fsqstat->qs_incoredqs,
+			&fsqstat->qs_incoredqs,
+			sizeof(struct compat_fs_quota_stat) -
+			offsetof(struct compat_fs_quota_stat, qs_incoredqs)) ||
+			get_user(xdata, &fsqstat->qs_iwarnlimit) ||
+			put_user(xdata, &compat_fsqstat->qs_iwarnlimit))
+			break;
+		ret = 0;
+		break;
+	default:
+		ret = sys_quotactl(cmd, special, id, addr);
+	}
+	return ret;
+}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3fc62b0..e0b870f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -100,9 +100,13 @@
  *
  * Any operation working on dquots via inode pointers must hold dqptr_sem.  If
  * operation is just reading pointers from inode (or not using them at all) the
- * read lock is enough. If pointers are altered function must hold write lock
- * (these locking rules also apply for S_NOQUOTA flag in the inode - note that
- * for altering the flag i_mutex is also needed).
+ * read lock is enough. If pointers are altered function must hold write lock.
+ * Special care needs to be taken about S_NOQUOTA inode flag (marking that
+ * inode is a quota file). Functions adding pointers from inode to dquots have
+ * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
+ * have to do all pointer modifications before dropping dqptr_sem. This makes
+ * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
+ * then drops all pointers to dquots from an inode.
  *
  * Each dquot has its dq_lock mutex. Locked dquots might not be referenced
  * from inodes (dquot_alloc_space() and such don't check the dq_lock).
@@ -225,6 +229,9 @@
 struct dqstats dqstats;
 EXPORT_SYMBOL(dqstats);
 
+static qsize_t inode_get_rsv_space(struct inode *inode);
+static void __dquot_initialize(struct inode *inode, int type);
+
 static inline unsigned int
 hashfn(const struct super_block *sb, unsigned int id, int type)
 {
@@ -564,7 +571,7 @@
 }
 EXPORT_SYMBOL(dquot_scan_active);
 
-int vfs_quota_sync(struct super_block *sb, int type)
+int vfs_quota_sync(struct super_block *sb, int type, int wait)
 {
 	struct list_head *dirty;
 	struct dquot *dquot;
@@ -609,6 +616,33 @@
 	spin_unlock(&dq_list_lock);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
+	if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
+		return 0;
+
+	/* This is not very clever (and fast) but currently I don't know about
+	 * any other simple way of getting quota data to disk and we must get
+	 * them there for userspace to be visible... */
+	if (sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, 1);
+	sync_blockdev(sb->s_bdev);
+
+	/*
+	 * Now when everything is written we can discard the pagecache so
+	 * that userspace sees the changes.
+	 */
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (type != -1 && cnt != type)
+			continue;
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
+				  I_MUTEX_QUOTA);
+		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
+		mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
+	}
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+
 	return 0;
 }
 EXPORT_SYMBOL(vfs_quota_sync);
@@ -840,11 +874,14 @@
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct inode *inode, *old_inode = NULL;
+	int reserved = 0;
 
 	spin_lock(&inode_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
 			continue;
+		if (unlikely(inode_get_rsv_space(inode) > 0))
+			reserved = 1;
 		if (!atomic_read(&inode->i_writecount))
 			continue;
 		if (!dqinit_needed(inode, type))
@@ -854,7 +891,7 @@
 		spin_unlock(&inode_lock);
 
 		iput(old_inode);
-		sb->dq_op->initialize(inode, type);
+		__dquot_initialize(inode, type);
 		/* We hold a reference to 'inode' so it couldn't have been
 		 * removed from s_inodes list while we dropped the inode_lock.
 		 * We cannot iput the inode now as we can be holding the last
@@ -865,6 +902,12 @@
 	}
 	spin_unlock(&inode_lock);
 	iput(old_inode);
+
+	if (reserved) {
+		printk(KERN_WARNING "VFS (%s): Writes happened before quota"
+			" was turned on thus quota information is probably "
+			"inconsistent. Please run quotacheck(8).\n", sb->s_id);
+	}
 }
 
 /*
@@ -978,10 +1021,12 @@
 /*
  * Claim reserved quota space
  */
-static void dquot_claim_reserved_space(struct dquot *dquot,
-						qsize_t number)
+static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
 {
-	WARN_ON(dquot->dq_dqb.dqb_rsvspace < number);
+	if (dquot->dq_dqb.dqb_rsvspace < number) {
+		WARN_ON_ONCE(1);
+		number = dquot->dq_dqb.dqb_rsvspace;
+	}
 	dquot->dq_dqb.dqb_curspace += number;
 	dquot->dq_dqb.dqb_rsvspace -= number;
 }
@@ -989,7 +1034,12 @@
 static inline
 void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
 {
-	dquot->dq_dqb.dqb_rsvspace -= number;
+	if (dquot->dq_dqb.dqb_rsvspace >= number)
+		dquot->dq_dqb.dqb_rsvspace -= number;
+	else {
+		WARN_ON_ONCE(1);
+		dquot->dq_dqb.dqb_rsvspace = 0;
+	}
 }
 
 static void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
@@ -1131,13 +1181,13 @@
 	*warntype = QUOTA_NL_NOWARN;
 	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
-		return QUOTA_OK;
+		return 0;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
 	    newinodes > dquot->dq_dqb.dqb_ihardlimit &&
             !ignore_hardlimit(dquot)) {
 		*warntype = QUOTA_NL_IHARDWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1146,7 +1196,7 @@
 	    get_seconds() >= dquot->dq_dqb.dqb_itime &&
             !ignore_hardlimit(dquot)) {
 		*warntype = QUOTA_NL_ISOFTLONGWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1157,7 +1207,7 @@
 		    sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
 	}
 
-	return QUOTA_OK;
+	return 0;
 }
 
 /* needs dq_data_lock */
@@ -1169,7 +1219,7 @@
 	*warntype = QUOTA_NL_NOWARN;
 	if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
-		return QUOTA_OK;
+		return 0;
 
 	tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
 		+ space;
@@ -1179,7 +1229,7 @@
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BHARDWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1189,7 +1239,7 @@
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BSOFTLONGWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1205,10 +1255,10 @@
 			 * We don't allow preallocation to exceed softlimit so exceeding will
 			 * be always printed
 			 */
-			return NO_QUOTA;
+			return -EDQUOT;
 	}
 
-	return QUOTA_OK;
+	return 0;
 }
 
 static int info_idq_free(struct dquot *dquot, qsize_t inodes)
@@ -1242,25 +1292,32 @@
 		return QUOTA_NL_BHARDBELOW;
 	return QUOTA_NL_NOWARN;
 }
+
 /*
- *	Initialize quota pointers in inode
- *	We do things in a bit complicated way but by that we avoid calling
- *	dqget() and thus filesystem callbacks under dqptr_sem.
+ * Initialize quota pointers in inode
+ *
+ * We do things in a bit complicated way but by that we avoid calling
+ * dqget() and thus filesystem callbacks under dqptr_sem.
+ *
+ * It is better to call this function outside of any transaction as it
+ * might need a lot of space in journal for dquot structure allocation.
  */
-int dquot_initialize(struct inode *inode, int type)
+static void __dquot_initialize(struct inode *inode, int type)
 {
 	unsigned int id = 0;
-	int cnt, ret = 0;
-	struct dquot *got[MAXQUOTAS] = { NULL, NULL };
+	int cnt;
+	struct dquot *got[MAXQUOTAS];
 	struct super_block *sb = inode->i_sb;
+	qsize_t rsv;
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return 0;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return;
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		got[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
 		switch (cnt) {
@@ -1275,7 +1332,6 @@
 	}
 
 	down_write(&sb_dqopt(sb)->dqptr_sem);
-	/* Having dqptr_sem we know NOQUOTA flags can't be altered... */
 	if (IS_NOQUOTA(inode))
 		goto out_err;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1287,20 +1343,31 @@
 		if (!inode->i_dquot[cnt]) {
 			inode->i_dquot[cnt] = got[cnt];
 			got[cnt] = NULL;
+			/*
+			 * Make quota reservation system happy if someone
+			 * did a write before quota was turned on
+			 */
+			rsv = inode_get_rsv_space(inode);
+			if (unlikely(rsv))
+				dquot_resv_space(inode->i_dquot[cnt], rsv);
 		}
 	}
 out_err:
 	up_write(&sb_dqopt(sb)->dqptr_sem);
 	/* Drop unused references */
 	dqput_all(got);
-	return ret;
+}
+
+void dquot_initialize(struct inode *inode)
+{
+	__dquot_initialize(inode, -1);
 }
 EXPORT_SYMBOL(dquot_initialize);
 
 /*
  * 	Release all quotas referenced by inode
  */
-int dquot_drop(struct inode *inode)
+static void __dquot_drop(struct inode *inode)
 {
 	int cnt;
 	struct dquot *put[MAXQUOTAS];
@@ -1312,33 +1379,32 @@
 	}
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	dqput_all(put);
-	return 0;
+}
+
+void dquot_drop(struct inode *inode)
+{
+	int cnt;
+
+	if (IS_NOQUOTA(inode))
+		return;
+
+	/*
+	 * Test before calling to rule out calls from proc and such
+	 * where we are not allowed to block. Note that this is
+	 * actually reliable test even without the lock - the caller
+	 * must assure that nobody can come after the DQUOT_DROP and
+	 * add quota pointers back anyway.
+	 */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (inode->i_dquot[cnt])
+			break;
+	}
+
+	if (cnt < MAXQUOTAS)
+		__dquot_drop(inode);
 }
 EXPORT_SYMBOL(dquot_drop);
 
-/* Wrapper to remove references to quota structures from inode */
-void vfs_dq_drop(struct inode *inode)
-{
-	/* Here we can get arbitrary inode from clear_inode() so we have
-	 * to be careful. OTOH we don't need locking as quota operations
-	 * are allowed to change only at mount time */
-	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
-	    && inode->i_sb->dq_op->drop) {
-		int cnt;
-		/* Test before calling to rule out calls from proc and such
-                 * where we are not allowed to block. Note that this is
-		 * actually reliable test even without the lock - the caller
-		 * must assure that nobody can come after the DQUOT_DROP and
-		 * add quota pointers back anyway */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt])
-				break;
-		if (cnt < MAXQUOTAS)
-			inode->i_sb->dq_op->drop(inode);
-	}
-}
-EXPORT_SYMBOL(vfs_dq_drop);
-
 /*
  * inode_reserved_space is managed internally by quota, and protected by
  * i_lock similar to i_blocks+i_bytes.
@@ -1351,28 +1417,30 @@
 	return inode->i_sb->dq_op->get_reserved_space(inode);
 }
 
-static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+void inode_add_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) += number;
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_add_rsv_space);
 
-
-static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+void inode_claim_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) -= number;
 	__inode_add_bytes(inode, number);
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_claim_rsv_space);
 
-static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+void inode_sub_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) -= number;
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_sub_rsv_space);
 
 static qsize_t inode_get_rsv_space(struct inode *inode)
 {
@@ -1404,38 +1472,34 @@
 }
 
 /*
- * Following four functions update i_blocks+i_bytes fields and
- * quota information (together with appropriate checks)
- * NOTE: We absolutely rely on the fact that caller dirties
- * the inode (usually macros in quotaops.h care about this) and
- * holds a handle for the current transaction so that dquot write and
- * inode write go into the same transaction.
+ * This functions updates i_blocks+i_bytes fields and quota information
+ * (together with appropriate checks).
+ *
+ * NOTE: We absolutely rely on the fact that caller dirties the inode
+ * (usually helpers in quotaops.h care about this) and holds a handle for
+ * the current transaction so that dquot write and inode write go into the
+ * same transaction.
  */
 
 /*
  * This operation can block, but only after everything is updated
  */
 int __dquot_alloc_space(struct inode *inode, qsize_t number,
-			int warn, int reserve)
+		int warn, int reserve)
 {
-	int cnt, ret = QUOTA_OK;
+	int cnt, ret = 0;
 	char warntype[MAXQUOTAS];
 
 	/*
 	 * First test before acquiring mutex - solves deadlocks when we
 	 * re-enter the quota code and are already holding the mutex
 	 */
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_incr_space(inode, number, reserve);
 		goto out;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode)) {
-		inode_incr_space(inode, number, reserve);
-		goto out_unlock;
-	}
-
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 
@@ -1443,9 +1507,9 @@
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
-		    == NO_QUOTA) {
-			ret = NO_QUOTA;
+		ret = check_bdq(inode->i_dquot[cnt], number, !warn,
+				warntype+cnt);
+		if (ret) {
 			spin_unlock(&dq_data_lock);
 			goto out_flush_warn;
 		}
@@ -1466,61 +1530,45 @@
 	mark_all_dquot_dirty(inode->i_dquot);
 out_flush_warn:
 	flush_warnings(inode->i_dquot, warntype);
-out_unlock:
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
 	return ret;
 }
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
-	return __dquot_alloc_space(inode, number, warn, 0);
-}
-EXPORT_SYMBOL(dquot_alloc_space);
-
-int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
-{
-	return __dquot_alloc_space(inode, number, warn, 1);
-}
-EXPORT_SYMBOL(dquot_reserve_space);
+EXPORT_SYMBOL(__dquot_alloc_space);
 
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode, qsize_t number)
+int dquot_alloc_inode(const struct inode *inode)
 {
-	int cnt, ret = NO_QUOTA;
+	int cnt, ret = 0;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode)) {
-		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		return QUOTA_OK;
-	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_idq(inode->i_dquot[cnt], number, warntype+cnt)
-		    == NO_QUOTA)
+		ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
+		if (ret)
 			goto warn_put_all;
 	}
 
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		dquot_incr_inodes(inode->i_dquot[cnt], number);
+		dquot_incr_inodes(inode->i_dquot[cnt], 1);
 	}
-	ret = QUOTA_OK;
+
 warn_put_all:
 	spin_unlock(&dq_data_lock);
-	if (ret == QUOTA_OK)
+	if (ret == 0)
 		mark_all_dquot_dirty(inode->i_dquot);
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1528,23 +1576,19 @@
 }
 EXPORT_SYMBOL(dquot_alloc_inode);
 
-int dquot_claim_space(struct inode *inode, qsize_t number)
+/*
+ * Convert in-memory reserved quotas to real consumed quotas
+ */
+int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
 	int cnt;
-	int ret = QUOTA_OK;
 
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_claim_rsv_space(inode, number);
-		goto out;
+		return 0;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	if (IS_NOQUOTA(inode))	{
-		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		inode_claim_rsv_space(inode, number);
-		goto out;
-	}
-
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1557,33 +1601,26 @@
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return ret;
+	return 0;
 }
-EXPORT_SYMBOL(dquot_claim_space);
+EXPORT_SYMBOL(dquot_claim_space_nodirty);
 
 /*
  * This operation can block, but only after everything is updated
  */
-int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode)) {
-out_sub:
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_decr_space(inode, number, reserve);
-		return QUOTA_OK;
+		return;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	/* Now recheck reliably when holding dqptr_sem */
-	if (IS_NOQUOTA(inode)) {
-		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		goto out_sub;
-	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
@@ -1603,56 +1640,34 @@
 out_unlock:
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	return QUOTA_OK;
 }
-
-int dquot_free_space(struct inode *inode, qsize_t number)
-{
-	return  __dquot_free_space(inode, number, 0);
-}
-EXPORT_SYMBOL(dquot_free_space);
-
-/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
-	__dquot_free_space(inode, number, 1);
-
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
+EXPORT_SYMBOL(__dquot_free_space);
 
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_inode(const struct inode *inode, qsize_t number)
+void dquot_free_inode(const struct inode *inode)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return;
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	/* Now recheck reliably when holding dqptr_sem */
-	if (IS_NOQUOTA(inode)) {
-		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		return QUOTA_OK;
-	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
-		dquot_decr_inodes(inode->i_dquot[cnt], number);
+		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
+		dquot_decr_inodes(inode->i_dquot[cnt], 1);
 	}
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	return QUOTA_OK;
 }
 EXPORT_SYMBOL(dquot_free_inode);
 
@@ -1662,37 +1677,31 @@
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  */
-int dquot_transfer(struct inode *inode, struct iattr *iattr)
+static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
 {
 	qsize_t space, cur_space;
 	qsize_t rsv_space = 0;
 	struct dquot *transfer_from[MAXQUOTAS];
 	struct dquot *transfer_to[MAXQUOTAS];
-	int cnt, ret = QUOTA_OK;
-	int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid,
-	    chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid;
+	int cnt, ret = 0;
 	char warntype_to[MAXQUOTAS];
 	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+		return 0;
 	/* Initialize the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		transfer_from[cnt] = NULL;
 		transfer_to[cnt] = NULL;
 		warntype_to[cnt] = QUOTA_NL_NOWARN;
 	}
-	if (chuid)
-		transfer_to[USRQUOTA] = dqget(inode->i_sb, iattr->ia_uid,
-					      USRQUOTA);
-	if (chgid)
-		transfer_to[GRPQUOTA] = dqget(inode->i_sb, iattr->ia_gid,
-					      GRPQUOTA);
-
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (mask & (1 << cnt))
+			transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
+	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	/* Now recheck reliably when holding dqptr_sem */
 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
 		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 		goto put_all;
@@ -1706,9 +1715,11 @@
 		if (!transfer_to[cnt])
 			continue;
 		transfer_from[cnt] = inode->i_dquot[cnt];
-		if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
-		    NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
-		    warntype_to + cnt) == NO_QUOTA)
+		ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
+		if (ret)
+			goto over_quota;
+		ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt);
+		if (ret)
 			goto over_quota;
 	}
 
@@ -1762,22 +1773,32 @@
 	/* Clear dquot pointers we don't want to dqput() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		transfer_from[cnt] = NULL;
-	ret = NO_QUOTA;
 	goto warn_put_all;
 }
-EXPORT_SYMBOL(dquot_transfer);
 
-/* Wrapper for transferring ownership of an inode */
-int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+/* Wrapper for transferring ownership of an inode for uid/gid only
+ * Called from FSXXX_setattr()
+ */
+int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
+	qid_t chid[MAXQUOTAS];
+	unsigned long mask = 0;
+
+	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
+		mask |= 1 << USRQUOTA;
+		chid[USRQUOTA] = iattr->ia_uid;
+	}
+	if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
+		mask |= 1 << GRPQUOTA;
+		chid[GRPQUOTA] = iattr->ia_gid;
+	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
-		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
-			return 1;
+		dquot_initialize(inode);
+		return __dquot_transfer(inode, chid, mask);
 	}
 	return 0;
 }
-EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(dquot_transfer);
 
 /*
  * Write info of quota file to disk
@@ -1798,13 +1819,6 @@
  * Definitions of diskquota operations.
  */
 const struct dquot_operations dquot_operations = {
-	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
-	.free_inode	= dquot_free_inode,
-	.transfer	= dquot_transfer,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
@@ -1815,6 +1829,20 @@
 };
 
 /*
+ * Generic helper for ->open on filesystems supporting disk quotas.
+ */
+int dquot_file_open(struct inode *inode, struct file *file)
+{
+	int error;
+
+	error = generic_file_open(inode, file);
+	if (!error && (file->f_mode & FMODE_WRITE))
+		dquot_initialize(inode);
+	return error;
+}
+EXPORT_SYMBOL(dquot_file_open);
+
+/*
  * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
  */
 int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
@@ -1993,11 +2021,13 @@
 	}
 
 	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
-		/* As we bypass the pagecache we must now flush the inode so
-		 * that we see all the changes from userspace... */
-		write_inode_now(inode, 1);
-		/* And now flush the block cache so that kernel sees the
-		 * changes */
+		/* As we bypass the pagecache we must now flush all the
+		 * dirty data and invalidate caches so that kernel sees
+		 * changes from userspace. It is not enough to just flush
+		 * the quota file since if blocksize < pagesize, invalidation
+		 * of the cache could fail because of other unrelated dirty
+		 * data */
+		sync_filesystem(sb);
 		invalidate_bdev(sb->s_bdev);
 	}
 	mutex_lock(&dqopt->dqonoff_mutex);
@@ -2010,14 +2040,16 @@
 		/* We don't want quota and atime on quota files (deadlocks
 		 * possible) Also nobody should write to the file - we use
 		 * special IO operations which ignore the immutable bit. */
-		down_write(&dqopt->dqptr_sem);
 		mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 		oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
 					     S_NOQUOTA);
 		inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
 		mutex_unlock(&inode->i_mutex);
-		up_write(&dqopt->dqptr_sem);
-		sb->dq_op->drop(inode);
+		/*
+		 * When S_NOQUOTA is set, remove dquot references as no more
+		 * references can be added
+		 */
+		__dquot_drop(inode);
 	}
 
 	error = -EIO;
@@ -2053,14 +2085,12 @@
 	iput(inode);
 out_lock:
 	if (oldflags != -1) {
-		down_write(&dqopt->dqptr_sem);
 		mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 		/* Set the flags back (in the case of accidental quotaon()
 		 * on a wrong file we don't want to mess up the flags) */
 		inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
 		inode->i_flags |= oldflags;
 		mutex_unlock(&inode->i_mutex);
-		up_write(&dqopt->dqptr_sem);
 	}
 	mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
new file mode 100644
index 0000000..2663ed9
--- /dev/null
+++ b/fs/quota/netlink.c
@@ -0,0 +1,95 @@
+
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/quotaops.h>
+#include <linux/sched.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+/* Netlink family structure for quota */
+static struct genl_family quota_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = "VFS_DQUOT",
+	.version = 1,
+	.maxattr = QUOTA_NL_A_MAX,
+};
+
+/**
+ * quota_send_warning - Send warning to userspace about exceeded quota
+ * @type: The quota type: USRQQUOTA, GRPQUOTA,...
+ * @id: The user or group id of the quota that was exceeded
+ * @dev: The device on which the fs is mounted (sb->s_dev)
+ * @warntype: The type of the warning: QUOTA_NL_...
+ *
+ * This can be used by filesystems (including those which don't use
+ * dquot) to send a message to userspace relating to quota limits.
+ *
+ */
+
+void quota_send_warning(short type, unsigned int id, dev_t dev,
+			const char warntype)
+{
+	static atomic_t seq;
+	struct sk_buff *skb;
+	void *msg_head;
+	int ret;
+	int msg_size = 4 * nla_total_size(sizeof(u32)) +
+		       2 * nla_total_size(sizeof(u64));
+
+	/* We have to allocate using GFP_NOFS as we are called from a
+	 * filesystem performing write and thus further recursion into
+	 * the fs to free some data could cause deadlocks. */
+	skb = genlmsg_new(msg_size, GFP_NOFS);
+	if (!skb) {
+		printk(KERN_ERR
+		  "VFS: Not enough memory to send quota warning.\n");
+		return;
+	}
+	msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
+			&quota_genl_family, 0, QUOTA_NL_C_WARNING);
+	if (!msg_head) {
+		printk(KERN_ERR
+		  "VFS: Cannot store netlink header in quota warning.\n");
+		goto err_out;
+	}
+	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
+	if (ret)
+		goto attr_err_out;
+	genlmsg_end(skb, msg_head);
+
+	genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
+	return;
+attr_err_out:
+	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
+err_out:
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL(quota_send_warning);
+
+static int __init quota_init(void)
+{
+	if (genl_register_family(&quota_genl_family) != 0)
+		printk(KERN_ERR
+		       "VFS: Failed to create quota netlink interface.\n");
+	return 0;
+};
+
+module_init(quota_init);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index ee91e27..95388f9 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -10,7 +10,6 @@
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
-#include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
@@ -18,220 +17,205 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/types.h>
-#include <net/netlink.h>
-#include <net/genetlink.h>
+#include <linux/writeback.h>
 
-/* Check validity of generic quotactl commands */
-static int generic_quotactl_valid(struct super_block *sb, int type, int cmd,
-				  qid_t id)
+static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
+				     qid_t id)
 {
-	if (type >= MAXQUOTAS)
-		return -EINVAL;
-	if (!sb && cmd != Q_SYNC)
-		return -ENODEV;
-	/* Is operation supported? */
-	if (sb && !sb->s_qcop)
-		return -ENOSYS;
-
 	switch (cmd) {
-		case Q_GETFMT:
+	/* these commands do not require any special privilegues */
+	case Q_GETFMT:
+	case Q_SYNC:
+	case Q_GETINFO:
+	case Q_XGETQSTAT:
+	case Q_XQUOTASYNC:
+		break;
+	/* allow to query information for dquots we "own" */
+	case Q_GETQUOTA:
+	case Q_XGETQUOTA:
+		if ((type == USRQUOTA && current_euid() == id) ||
+		    (type == GRPQUOTA && in_egroup_p(id)))
 			break;
-		case Q_QUOTAON:
-			if (!sb->s_qcop->quota_on)
-				return -ENOSYS;
-			break;
-		case Q_QUOTAOFF:
-			if (!sb->s_qcop->quota_off)
-				return -ENOSYS;
-			break;
-		case Q_SETINFO:
-			if (!sb->s_qcop->set_info)
-				return -ENOSYS;
-			break;
-		case Q_GETINFO:
-			if (!sb->s_qcop->get_info)
-				return -ENOSYS;
-			break;
-		case Q_SETQUOTA:
-			if (!sb->s_qcop->set_dqblk)
-				return -ENOSYS;
-			break;
-		case Q_GETQUOTA:
-			if (!sb->s_qcop->get_dqblk)
-				return -ENOSYS;
-			break;
-		case Q_SYNC:
-			if (sb && !sb->s_qcop->quota_sync)
-				return -ENOSYS;
-			break;
-		default:
-			return -EINVAL;
-	}
-
-	/* Is quota turned on for commands which need it? */
-	switch (cmd) {
-		case Q_GETFMT:
-		case Q_GETINFO:
-		case Q_SETINFO:
-		case Q_SETQUOTA:
-		case Q_GETQUOTA:
-			/* This is just an informative test so we are satisfied
-			 * without the lock */
-			if (!sb_has_quota_active(sb, type))
-				return -ESRCH;
-	}
-
-	/* Check privileges */
-	if (cmd == Q_GETQUOTA) {
-		if (((type == USRQUOTA && current_euid() != id) ||
-		     (type == GRPQUOTA && !in_egroup_p(id))) &&
-		    !capable(CAP_SYS_ADMIN))
-			return -EPERM;
-	}
-	else if (cmd != Q_GETFMT && cmd != Q_SYNC && cmd != Q_GETINFO)
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-	return 0;
-}
-
-/* Check validity of XFS Quota Manager commands */
-static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd,
-			      qid_t id)
-{
-	if (type >= XQM_MAXQUOTAS)
-		return -EINVAL;
-	if (!sb)
-		return -ENODEV;
-	if (!sb->s_qcop)
-		return -ENOSYS;
-
-	switch (cmd) {
-		case Q_XQUOTAON:
-		case Q_XQUOTAOFF:
-		case Q_XQUOTARM:
-			if (!sb->s_qcop->set_xstate)
-				return -ENOSYS;
-			break;
-		case Q_XGETQSTAT:
-			if (!sb->s_qcop->get_xstate)
-				return -ENOSYS;
-			break;
-		case Q_XSETQLIM:
-			if (!sb->s_qcop->set_xquota)
-				return -ENOSYS;
-			break;
-		case Q_XGETQUOTA:
-			if (!sb->s_qcop->get_xquota)
-				return -ENOSYS;
-			break;
-		case Q_XQUOTASYNC:
-			if (!sb->s_qcop->quota_sync)
-				return -ENOSYS;
-			break;
-		default:
-			return -EINVAL;
-	}
-
-	/* Check privileges */
-	if (cmd == Q_XGETQUOTA) {
-		if (((type == XQM_USRQUOTA && current_euid() != id) ||
-		     (type == XQM_GRPQUOTA && !in_egroup_p(id))) &&
-		     !capable(CAP_SYS_ADMIN))
-			return -EPERM;
-	} else if (cmd != Q_XGETQSTAT && cmd != Q_XQUOTASYNC) {
+		/*FALLTHROUGH*/
+	default:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 	}
 
-	return 0;
+	return security_quotactl(cmd, type, id, sb);
 }
 
-static int check_quotactl_valid(struct super_block *sb, int type, int cmd,
-				qid_t id)
-{
-	int error;
-
-	if (XQM_COMMAND(cmd))
-		error = xqm_quotactl_valid(sb, type, cmd, id);
-	else
-		error = generic_quotactl_valid(sb, type, cmd, id);
-	if (!error)
-		error = security_quotactl(cmd, type, id, sb);
-	return error;
-}
-
-#ifdef CONFIG_QUOTA
-void sync_quota_sb(struct super_block *sb, int type)
-{
-	int cnt;
-
-	if (!sb->s_qcop->quota_sync)
-		return;
-
-	sb->s_qcop->quota_sync(sb, type);
-
-	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
-		return;
-	/* This is not very clever (and fast) but currently I don't know about
-	 * any other simple way of getting quota data to disk and we must get
-	 * them there for userspace to be visible... */
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
-
-	/*
-	 * Now when everything is written we can discard the pagecache so
-	 * that userspace sees the changes.
-	 */
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (type != -1 && cnt != type)
-			continue;
-		if (!sb_has_quota_active(sb, cnt))
-			continue;
-		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
-				  I_MUTEX_QUOTA);
-		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
-		mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
-	}
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-}
-#endif
-
-static void sync_dquots(int type)
+static int quota_sync_all(int type)
 {
 	struct super_block *sb;
-	int cnt;
+	int ret;
+
+	if (type >= MAXQUOTAS)
+		return -EINVAL;
+	ret = security_quotactl(Q_SYNC, type, 0, NULL);
+	if (ret)
+		return ret;
 
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		/* This test just improves performance so it needn't be
-		 * reliable... */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-			if (type != -1 && type != cnt)
-				continue;
-			if (!sb_has_quota_active(sb, cnt))
-				continue;
-			if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
-			   list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
-				continue;
-			break;
-		}
-		if (cnt == MAXQUOTAS)
+		if (!sb->s_qcop || !sb->s_qcop->quota_sync)
 			continue;
+
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root)
-			sync_quota_sb(sb, type);
+			sb->s_qcop->quota_sync(sb, type, 1);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (__put_super_and_need_restart(sb))
 			goto restart;
 	}
 	spin_unlock(&sb_lock);
+
+	return 0;
+}
+
+static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
+		         void __user *addr)
+{
+	char *pathname;
+	int ret = -ENOSYS;
+
+	pathname = getname(addr);
+	if (IS_ERR(pathname))
+		return PTR_ERR(pathname);
+	if (sb->s_qcop->quota_on)
+		ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0);
+	putname(pathname);
+	return ret;
+}
+
+static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
+{
+	__u32 fmt;
+
+	down_read(&sb_dqopt(sb)->dqptr_sem);
+	if (!sb_has_quota_active(sb, type)) {
+		up_read(&sb_dqopt(sb)->dqptr_sem);
+		return -ESRCH;
+	}
+	fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
+	up_read(&sb_dqopt(sb)->dqptr_sem);
+	if (copy_to_user(addr, &fmt, sizeof(fmt)))
+		return -EFAULT;
+	return 0;
+}
+
+static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
+{
+	struct if_dqinfo info;
+	int ret;
+
+	if (!sb_has_quota_active(sb, type))
+		return -ESRCH;
+	if (!sb->s_qcop->get_info)
+		return -ENOSYS;
+	ret = sb->s_qcop->get_info(sb, type, &info);
+	if (!ret && copy_to_user(addr, &info, sizeof(info)))
+		return -EFAULT;
+	return ret;
+}
+
+static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
+{
+	struct if_dqinfo info;
+
+	if (copy_from_user(&info, addr, sizeof(info)))
+		return -EFAULT;
+	if (!sb_has_quota_active(sb, type))
+		return -ESRCH;
+	if (!sb->s_qcop->set_info)
+		return -ENOSYS;
+	return sb->s_qcop->set_info(sb, type, &info);
+}
+
+static int quota_getquota(struct super_block *sb, int type, qid_t id,
+			  void __user *addr)
+{
+	struct if_dqblk idq;
+	int ret;
+
+	if (!sb_has_quota_active(sb, type))
+		return -ESRCH;
+	if (!sb->s_qcop->get_dqblk)
+		return -ENOSYS;
+	ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
+	if (ret)
+		return ret;
+	if (copy_to_user(addr, &idq, sizeof(idq)))
+		return -EFAULT;
+	return 0;
+}
+
+static int quota_setquota(struct super_block *sb, int type, qid_t id,
+			  void __user *addr)
+{
+	struct if_dqblk idq;
+
+	if (copy_from_user(&idq, addr, sizeof(idq)))
+		return -EFAULT;
+	if (!sb_has_quota_active(sb, type))
+		return -ESRCH;
+	if (!sb->s_qcop->set_dqblk)
+		return -ENOSYS;
+	return sb->s_qcop->set_dqblk(sb, type, id, &idq);
+}
+
+static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
+{
+	__u32 flags;
+
+	if (copy_from_user(&flags, addr, sizeof(flags)))
+		return -EFAULT;
+	if (!sb->s_qcop->set_xstate)
+		return -ENOSYS;
+	return sb->s_qcop->set_xstate(sb, flags, cmd);
+}
+
+static int quota_getxstate(struct super_block *sb, void __user *addr)
+{
+	struct fs_quota_stat fqs;
+	int ret;
+
+	if (!sb->s_qcop->get_xstate)
+		return -ENOSYS;
+	ret = sb->s_qcop->get_xstate(sb, &fqs);
+	if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
+		return -EFAULT;
+	return ret;
+}
+
+static int quota_setxquota(struct super_block *sb, int type, qid_t id,
+			   void __user *addr)
+{
+	struct fs_disk_quota fdq;
+
+	if (copy_from_user(&fdq, addr, sizeof(fdq)))
+		return -EFAULT;
+	if (!sb->s_qcop->set_xquota)
+		return -ENOSYS;
+	return sb->s_qcop->set_xquota(sb, type, id, &fdq);
+}
+
+static int quota_getxquota(struct super_block *sb, int type, qid_t id,
+			   void __user *addr)
+{
+	struct fs_disk_quota fdq;
+	int ret;
+
+	if (!sb->s_qcop->get_xquota)
+		return -ENOSYS;
+	ret = sb->s_qcop->get_xquota(sb, type, id, &fdq);
+	if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
+		return -EFAULT;
+	return ret;
 }
 
 /* Copy parameters and call proper function */
@@ -240,117 +224,55 @@
 {
 	int ret;
 
+	if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS))
+		return -EINVAL;
+	if (!sb->s_qcop)
+		return -ENOSYS;
+
+	ret = check_quotactl_permission(sb, type, cmd, id);
+	if (ret < 0)
+		return ret;
+
 	switch (cmd) {
-		case Q_QUOTAON: {
-			char *pathname;
-
-			pathname = getname(addr);
-			if (IS_ERR(pathname))
-				return PTR_ERR(pathname);
-			ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0);
-			putname(pathname);
-			return ret;
-		}
-		case Q_QUOTAOFF:
-			return sb->s_qcop->quota_off(sb, type, 0);
-
-		case Q_GETFMT: {
-			__u32 fmt;
-
-			down_read(&sb_dqopt(sb)->dqptr_sem);
-			if (!sb_has_quota_active(sb, type)) {
-				up_read(&sb_dqopt(sb)->dqptr_sem);
-				return -ESRCH;
-			}
-			fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
-			up_read(&sb_dqopt(sb)->dqptr_sem);
-			if (copy_to_user(addr, &fmt, sizeof(fmt)))
-				return -EFAULT;
-			return 0;
-		}
-		case Q_GETINFO: {
-			struct if_dqinfo info;
-
-			ret = sb->s_qcop->get_info(sb, type, &info);
-			if (ret)
-				return ret;
-			if (copy_to_user(addr, &info, sizeof(info)))
-				return -EFAULT;
-			return 0;
-		}
-		case Q_SETINFO: {
-			struct if_dqinfo info;
-
-			if (copy_from_user(&info, addr, sizeof(info)))
-				return -EFAULT;
-			return sb->s_qcop->set_info(sb, type, &info);
-		}
-		case Q_GETQUOTA: {
-			struct if_dqblk idq;
-
-			ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
-			if (ret)
-				return ret;
-			if (copy_to_user(addr, &idq, sizeof(idq)))
-				return -EFAULT;
-			return 0;
-		}
-		case Q_SETQUOTA: {
-			struct if_dqblk idq;
-
-			if (copy_from_user(&idq, addr, sizeof(idq)))
-				return -EFAULT;
-			return sb->s_qcop->set_dqblk(sb, type, id, &idq);
-		}
-		case Q_SYNC:
-			if (sb)
-				sync_quota_sb(sb, type);
-			else
-				sync_dquots(type);
-			return 0;
-
-		case Q_XQUOTAON:
-		case Q_XQUOTAOFF:
-		case Q_XQUOTARM: {
-			__u32 flags;
-
-			if (copy_from_user(&flags, addr, sizeof(flags)))
-				return -EFAULT;
-			return sb->s_qcop->set_xstate(sb, flags, cmd);
-		}
-		case Q_XGETQSTAT: {
-			struct fs_quota_stat fqs;
-		
-			if ((ret = sb->s_qcop->get_xstate(sb, &fqs)))
-				return ret;
-			if (copy_to_user(addr, &fqs, sizeof(fqs)))
-				return -EFAULT;
-			return 0;
-		}
-		case Q_XSETQLIM: {
-			struct fs_disk_quota fdq;
-
-			if (copy_from_user(&fdq, addr, sizeof(fdq)))
-				return -EFAULT;
-		       return sb->s_qcop->set_xquota(sb, type, id, &fdq);
-		}
-		case Q_XGETQUOTA: {
-			struct fs_disk_quota fdq;
-
-			ret = sb->s_qcop->get_xquota(sb, type, id, &fdq);
-			if (ret)
-				return ret;
-			if (copy_to_user(addr, &fdq, sizeof(fdq)))
-				return -EFAULT;
-			return 0;
-		}
-		case Q_XQUOTASYNC:
-			return sb->s_qcop->quota_sync(sb, type);
-		/* We never reach here unless validity check is broken */
-		default:
-			BUG();
+	case Q_QUOTAON:
+		return quota_quotaon(sb, type, cmd, id, addr);
+	case Q_QUOTAOFF:
+		if (!sb->s_qcop->quota_off)
+			return -ENOSYS;
+		return sb->s_qcop->quota_off(sb, type, 0);
+	case Q_GETFMT:
+		return quota_getfmt(sb, type, addr);
+	case Q_GETINFO:
+		return quota_getinfo(sb, type, addr);
+	case Q_SETINFO:
+		return quota_setinfo(sb, type, addr);
+	case Q_GETQUOTA:
+		return quota_getquota(sb, type, id, addr);
+	case Q_SETQUOTA:
+		return quota_setquota(sb, type, id, addr);
+	case Q_SYNC:
+		if (!sb->s_qcop->quota_sync)
+			return -ENOSYS;
+		return sb->s_qcop->quota_sync(sb, type, 1);
+	case Q_XQUOTAON:
+	case Q_XQUOTAOFF:
+	case Q_XQUOTARM:
+		return quota_setxstate(sb, cmd, addr);
+	case Q_XGETQSTAT:
+		return quota_getxstate(sb, addr);
+	case Q_XSETQLIM:
+		return quota_setxquota(sb, type, id, addr);
+	case Q_XGETQUOTA:
+		return quota_getxquota(sb, type, id, addr);
+	case Q_XQUOTASYNC:
+		/* caller already holds s_umount */
+		if (sb->s_flags & MS_RDONLY)
+			return -EROFS;
+		writeback_inodes_sb(sb);
+		return 0;
+	default:
+		return -EINVAL;
 	}
-	return 0;
 }
 
 /*
@@ -397,224 +319,23 @@
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
-	if (cmds != Q_SYNC || special) {
-		sb = quotactl_block(special);
-		if (IS_ERR(sb))
-			return PTR_ERR(sb);
+	/*
+	 * As a special case Q_SYNC can be called without a specific device.
+	 * It will iterate all superblocks that have quota enabled and call
+	 * the sync action on each of them.
+	 */
+	if (!special) {
+		if (cmds == Q_SYNC)
+			return quota_sync_all(type);
+		return -ENODEV;
 	}
 
-	ret = check_quotactl_valid(sb, type, cmds, id);
-	if (ret >= 0)
-		ret = do_quotactl(sb, type, cmds, id, addr);
-	if (sb)
-		drop_super(sb);
+	sb = quotactl_block(special);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
 
+	ret = do_quotactl(sb, type, cmds, id, addr);
+
+	drop_super(sb);
 	return ret;
 }
-
-#if defined(CONFIG_COMPAT_FOR_U64_ALIGNMENT)
-/*
- * This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64)
- * and is necessary due to alignment problems.
- */
-struct compat_if_dqblk {
-	compat_u64 dqb_bhardlimit;
-	compat_u64 dqb_bsoftlimit;
-	compat_u64 dqb_curspace;
-	compat_u64 dqb_ihardlimit;
-	compat_u64 dqb_isoftlimit;
-	compat_u64 dqb_curinodes;
-	compat_u64 dqb_btime;
-	compat_u64 dqb_itime;
-	compat_uint_t dqb_valid;
-};
-
-/* XFS structures */
-struct compat_fs_qfilestat {
-	compat_u64 dqb_bhardlimit;
-	compat_u64 qfs_nblks;
-	compat_uint_t qfs_nextents;
-};
-
-struct compat_fs_quota_stat {
-	__s8		qs_version;
-	__u16		qs_flags;
-	__s8		qs_pad;
-	struct compat_fs_qfilestat	qs_uquota;
-	struct compat_fs_qfilestat	qs_gquota;
-	compat_uint_t	qs_incoredqs;
-	compat_int_t	qs_btimelimit;
-	compat_int_t	qs_itimelimit;
-	compat_int_t	qs_rtbtimelimit;
-	__u16		qs_bwarnlimit;
-	__u16		qs_iwarnlimit;
-};
-
-asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
-						qid_t id, void __user *addr)
-{
-	unsigned int cmds;
-	struct if_dqblk __user *dqblk;
-	struct compat_if_dqblk __user *compat_dqblk;
-	struct fs_quota_stat __user *fsqstat;
-	struct compat_fs_quota_stat __user *compat_fsqstat;
-	compat_uint_t data;
-	u16 xdata;
-	long ret;
-
-	cmds = cmd >> SUBCMDSHIFT;
-
-	switch (cmds) {
-	case Q_GETQUOTA:
-		dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
-		compat_dqblk = addr;
-		ret = sys_quotactl(cmd, special, id, dqblk);
-		if (ret)
-			break;
-		if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
-			get_user(data, &dqblk->dqb_valid) ||
-			put_user(data, &compat_dqblk->dqb_valid))
-			ret = -EFAULT;
-		break;
-	case Q_SETQUOTA:
-		dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
-		compat_dqblk = addr;
-		ret = -EFAULT;
-		if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) ||
-			get_user(data, &compat_dqblk->dqb_valid) ||
-			put_user(data, &dqblk->dqb_valid))
-			break;
-		ret = sys_quotactl(cmd, special, id, dqblk);
-		break;
-	case Q_XGETQSTAT:
-		fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
-		compat_fsqstat = addr;
-		ret = sys_quotactl(cmd, special, id, fsqstat);
-		if (ret)
-			break;
-		ret = -EFAULT;
-		/* Copying qs_version, qs_flags, qs_pad */
-		if (copy_in_user(compat_fsqstat, fsqstat,
-			offsetof(struct compat_fs_quota_stat, qs_uquota)))
-			break;
-		/* Copying qs_uquota */
-		if (copy_in_user(&compat_fsqstat->qs_uquota,
-			&fsqstat->qs_uquota,
-			sizeof(compat_fsqstat->qs_uquota)) ||
-			get_user(data, &fsqstat->qs_uquota.qfs_nextents) ||
-			put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents))
-			break;
-		/* Copying qs_gquota */
-		if (copy_in_user(&compat_fsqstat->qs_gquota,
-			&fsqstat->qs_gquota,
-			sizeof(compat_fsqstat->qs_gquota)) ||
-			get_user(data, &fsqstat->qs_gquota.qfs_nextents) ||
-			put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents))
-			break;
-		/* Copying the rest */
-		if (copy_in_user(&compat_fsqstat->qs_incoredqs,
-			&fsqstat->qs_incoredqs,
-			sizeof(struct compat_fs_quota_stat) -
-			offsetof(struct compat_fs_quota_stat, qs_incoredqs)) ||
-			get_user(xdata, &fsqstat->qs_iwarnlimit) ||
-			put_user(xdata, &compat_fsqstat->qs_iwarnlimit))
-			break;
-		ret = 0;
-		break;
-	default:
-		ret = sys_quotactl(cmd, special, id, addr);
-	}
-	return ret;
-}
-#endif
-
-
-#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
-
-/* Netlink family structure for quota */
-static struct genl_family quota_genl_family = {
-	.id = GENL_ID_GENERATE,
-	.hdrsize = 0,
-	.name = "VFS_DQUOT",
-	.version = 1,
-	.maxattr = QUOTA_NL_A_MAX,
-};
-
-/**
- * quota_send_warning - Send warning to userspace about exceeded quota
- * @type: The quota type: USRQQUOTA, GRPQUOTA,...
- * @id: The user or group id of the quota that was exceeded
- * @dev: The device on which the fs is mounted (sb->s_dev)
- * @warntype: The type of the warning: QUOTA_NL_...
- *
- * This can be used by filesystems (including those which don't use
- * dquot) to send a message to userspace relating to quota limits.
- *
- */
-
-void quota_send_warning(short type, unsigned int id, dev_t dev,
-			const char warntype)
-{
-	static atomic_t seq;
-	struct sk_buff *skb;
-	void *msg_head;
-	int ret;
-	int msg_size = 4 * nla_total_size(sizeof(u32)) +
-		       2 * nla_total_size(sizeof(u64));
-
-	/* We have to allocate using GFP_NOFS as we are called from a
-	 * filesystem performing write and thus further recursion into
-	 * the fs to free some data could cause deadlocks. */
-	skb = genlmsg_new(msg_size, GFP_NOFS);
-	if (!skb) {
-		printk(KERN_ERR
-		  "VFS: Not enough memory to send quota warning.\n");
-		return;
-	}
-	msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
-			&quota_genl_family, 0, QUOTA_NL_C_WARNING);
-	if (!msg_head) {
-		printk(KERN_ERR
-		  "VFS: Cannot store netlink header in quota warning.\n");
-		goto err_out;
-	}
-	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
-	if (ret)
-		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
-	if (ret)
-		goto attr_err_out;
-	genlmsg_end(skb, msg_head);
-
-	genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
-	return;
-attr_err_out:
-	printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
-err_out:
-	kfree_skb(skb);
-}
-EXPORT_SYMBOL(quota_send_warning);
-
-static int __init quota_init(void)
-{
-	if (genl_register_family(&quota_genl_family) != 0)
-		printk(KERN_ERR
-		       "VFS: Failed to create quota netlink interface.\n");
-	return 0;
-};
-
-module_init(quota_init);
-#endif
-
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 65c8727..dc014f7 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -425,7 +425,7 @@
 
 	journal_mark_dirty(th, s, sbh);
 	if (for_unformatted)
-		vfs_dq_free_block_nodirty(inode, 1);
+		dquot_free_block_nodirty(inode, 1);
 }
 
 void reiserfs_free_block(struct reiserfs_transaction_handle *th,
@@ -1049,7 +1049,7 @@
 			       amount_needed, hint->inode->i_uid);
 #endif
 		quota_ret =
-		    vfs_dq_alloc_block_nodirty(hint->inode, amount_needed);
+		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
 		if (quota_ret)	/* Quota exceeded? */
 			return QUOTA_EXCEEDED;
 		if (hint->preallocate && hint->prealloc_size) {
@@ -1058,7 +1058,7 @@
 				       "reiserquota: allocating (prealloc) %d blocks id=%u",
 				       hint->prealloc_size, hint->inode->i_uid);
 #endif
-			quota_ret = vfs_dq_prealloc_block_nodirty(hint->inode,
+			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
 							 hint->prealloc_size);
 			if (quota_ret)
 				hint->preallocate = hint->prealloc_size = 0;
@@ -1092,7 +1092,7 @@
 					       hint->inode->i_uid);
 #endif
 				/* Free not allocated blocks */
-				vfs_dq_free_block_nodirty(hint->inode,
+				dquot_free_block_nodirty(hint->inode,
 					amount_needed + hint->prealloc_size -
 					nr_allocated);
 			}
@@ -1125,7 +1125,7 @@
 			       REISERFS_I(hint->inode)->i_prealloc_count,
 			       hint->inode->i_uid);
 #endif
-		vfs_dq_free_block_nodirty(hint->inode, amount_needed +
+		dquot_free_block_nodirty(hint->inode, amount_needed +
 					 hint->prealloc_size - nr_allocated -
 					 REISERFS_I(hint->inode)->
 					 i_prealloc_count);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index da2dba0..1d9c127 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -289,7 +289,7 @@
 	.compat_ioctl = reiserfs_compat_ioctl,
 #endif
 	.mmap = reiserfs_file_mmap,
-	.open = generic_file_open,
+	.open = dquot_file_open,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
 	.aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2df0f5c..d1da94b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -34,6 +34,9 @@
 	int depth;
 	int err;
 
+	if (!is_bad_inode(inode))
+		dquot_initialize(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
@@ -54,7 +57,7 @@
 		 * after delete_object so that quota updates go into the same transaction as
 		 * stat data deletion */
 		if (!err) 
-			vfs_dq_free_inode(inode);
+			dquot_free_inode(inode);
 
 		if (journal_end(&th, inode->i_sb, jbegin_count))
 			goto out;
@@ -1615,7 +1618,7 @@
 ** to properly mark inodes for datasync and such, but only actually
 ** does something when called for a synchronous update.
 */
-int reiserfs_write_inode(struct inode *inode, int do_sync)
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct reiserfs_transaction_handle th;
 	int jbegin_count = 1;
@@ -1627,7 +1630,7 @@
 	 ** inode needs to reach disk for safety, and they can safely be
 	 ** ignored because the altered inode has already been logged.
 	 */
-	if (do_sync && !(current->flags & PF_MEMALLOC)) {
+	if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
 			reiserfs_update_sd(&th, inode);
@@ -1765,10 +1768,10 @@
 
 	BUG_ON(!th->t_trans_id);
 
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	dquot_initialize(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto out_end_trans;
-	}
 	if (!dir->i_nlink) {
 		err = -EPERM;
 		goto out_bad_inode;
@@ -1959,12 +1962,12 @@
 	INODE_PKEY(inode)->k_objectid = 0;
 
 	/* Quota change must be inside a transaction for journaling */
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
       out_end_trans:
 	journal_end(th, th->t_super, th->t_blocks_allocated);
 	/* Drop can be outside and it needs more credits so it's better to have it outside */
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	make_bad_inode(inode);
 
@@ -3073,6 +3076,8 @@
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
 	if (attr->ia_valid & ATTR_SIZE) {
+		dquot_initialize(inode);
+
 		/* version 2 items will be caught by the s_maxbytes check
 		 ** done for us in vmtruncate
 		 */
@@ -3134,8 +3139,7 @@
 						  jbegin_count);
 				if (error)
 					goto out;
-				error =
-				    vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+				error = dquot_transfer(inode, attr);
 				if (error) {
 					journal_end(&th, inode->i_sb,
 						    jbegin_count);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 9d4dcf0..96e4cbb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -546,7 +546,7 @@
 */
 static int drop_new_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	make_bad_inode(inode);
 	inode->i_flags |= S_NOQUOTA;
 	iput(inode);
@@ -554,7 +554,7 @@
 }
 
 /* utility function that does setup for reiserfs_new_inode.
-** vfs_dq_init needs lots of credits so it's better to have it
+** dquot_initialize needs lots of credits so it's better to have it
 ** outside of a transaction, so we had to pull some bits of
 ** reiserfs_new_inode out into this func.
 */
@@ -577,7 +577,7 @@
 	} else {
 		inode->i_gid = current_fsgid();
 	}
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	return 0;
 }
 
@@ -594,6 +594,8 @@
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
 
+	dquot_initialize(dir);
+
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -666,6 +668,8 @@
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	dquot_initialize(dir);
+
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -739,6 +743,8 @@
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
 
+	dquot_initialize(dir);
+
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
 	REISERFS_I(dir)->new_packing_locality = 1;
@@ -842,6 +848,8 @@
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
+	dquot_initialize(dir);
+
 	reiserfs_write_lock(dir->i_sb);
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
 	if (retval)
@@ -923,6 +931,8 @@
 	unsigned long savelink;
 	int depth;
 
+	dquot_initialize(dir);
+
 	inode = dentry->d_inode;
 
 	/* in this transaction we can be doing at max two balancings and update
@@ -1024,6 +1034,8 @@
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
+	dquot_initialize(parent_dir);
+
 	if (!(inode = new_inode(parent_dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -1111,6 +1123,8 @@
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
+	dquot_initialize(dir);
+
 	reiserfs_write_lock(dir->i_sb);
 	if (inode->i_nlink >= REISERFS_LINK_MAX) {
 		//FIXME: sd_nlink is 32 bit for new files
@@ -1235,6 +1249,9 @@
 	    JOURNAL_PER_BALANCE_CNT * 3 + 5 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
 
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 5fa7118..313d39d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1299,7 +1299,7 @@
 		       "reiserquota delete_item(): freeing %u, id=%u type=%c",
 		       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
 #endif
-	vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
 
 	/* Return deleted body length */
 	return ret_value;
@@ -1383,7 +1383,7 @@
 					       quota_cut_bytes, inode->i_uid,
 					       key2type(key));
 #endif
-				vfs_dq_free_space_nodirty(inode,
+				dquot_free_space_nodirty(inode,
 							 quota_cut_bytes);
 			}
 			break;
@@ -1733,7 +1733,7 @@
 		       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
 		       quota_cut_bytes, inode->i_uid, '?');
 #endif
-	vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
 	return ret_value;
 }
 
@@ -1968,9 +1968,10 @@
 		       key2type(&(key->on_disk_key)));
 #endif
 
-	if (vfs_dq_alloc_space_nodirty(inode, pasted_size)) {
+	retval = dquot_alloc_space_nodirty(inode, pasted_size);
+	if (retval) {
 		pathrelse(search_path);
-		return -EDQUOT;
+		return retval;
 	}
 	init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
 		       pasted_size);
@@ -2024,7 +2025,7 @@
 		       pasted_size, inode->i_uid,
 		       key2type(&(key->on_disk_key)));
 #endif
-	vfs_dq_free_space_nodirty(inode, pasted_size);
+	dquot_free_space_nodirty(inode, pasted_size);
 	return retval;
 }
 
@@ -2062,9 +2063,10 @@
 #endif
 		/* We can't dirty inode here. It would be immediately written but
 		 * appropriate stat item isn't inserted yet... */
-		if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) {
+		retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+		if (retval) {
 			pathrelse(path);
-			return -EDQUOT;
+			return retval;
 		}
 	}
 	init_tb_struct(th, &s_ins_balance, th->t_super, path,
@@ -2113,6 +2115,6 @@
 		       quota_bytes, inode->i_uid, head2type(ih));
 #endif
 	if (inode)
-		vfs_dq_free_space_nodirty(inode, quota_bytes);
+		dquot_free_space_nodirty(inode, quota_bytes);
 	return retval;
 }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b4a7dd0..04bf5d7 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -246,7 +246,7 @@
 			retval = remove_save_link_only(s, &save_link_key, 0);
 			continue;
 		}
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		if (truncate && S_ISDIR(inode->i_mode)) {
 			/* We got a truncate request for a dir which is impossible.
@@ -578,6 +578,11 @@
 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
 }
 
+static void reiserfs_clear_inode(struct inode *inode)
+{
+	dquot_drop(inode);
+}
+
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
@@ -590,6 +595,7 @@
 	.destroy_inode = reiserfs_destroy_inode,
 	.write_inode = reiserfs_write_inode,
 	.dirty_inode = reiserfs_dirty_inode,
+	.clear_inode = reiserfs_clear_inode,
 	.delete_inode = reiserfs_delete_inode,
 	.put_super = reiserfs_put_super,
 	.write_super = reiserfs_write_super,
@@ -616,13 +622,6 @@
 static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
 static const struct dquot_operations reiserfs_quota_operations = {
-	.initialize = dquot_initialize,
-	.drop = dquot_drop,
-	.alloc_space = dquot_alloc_space,
-	.alloc_inode = dquot_alloc_inode,
-	.free_space = dquot_free_space,
-	.free_inode = dquot_free_inode,
-	.transfer = dquot_transfer,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
 	.release_dquot = reiserfs_release_dquot,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 81f09fa..37d034c 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -61,7 +61,6 @@
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 	return dir->i_op->create(dir, dentry, mode, NULL);
 }
 #endif
@@ -69,7 +68,6 @@
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 	return dir->i_op->mkdir(dir, dentry, mode);
 }
 
@@ -81,7 +79,6 @@
 {
 	int error;
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 
 	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
 					I_MUTEX_CHILD, dir->i_sb);
@@ -97,7 +94,6 @@
 {
 	int error;
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 
 	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
 					I_MUTEX_CHILD, dir->i_sb);
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 70e3244..df8a19e 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
-squashfs-y += namei.o super.o symlink.o
+squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2a79603..1cb0d81 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -29,15 +29,14 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
-#include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/buffer_head.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
 #include "squashfs_fs_i.h"
 #include "squashfs.h"
+#include "decompressor.h"
 
 /*
  * Read the metadata block length, this is stored in the first two
@@ -153,72 +152,10 @@
 	}
 
 	if (compressed) {
-		int zlib_err = 0, zlib_init = 0;
-
-		/*
-		 * Uncompress block.
-		 */
-
-		mutex_lock(&msblk->read_data_mutex);
-
-		msblk->stream.avail_out = 0;
-		msblk->stream.avail_in = 0;
-
-		bytes = length;
-		do {
-			if (msblk->stream.avail_in == 0 && k < b) {
-				avail = min(bytes, msblk->devblksize - offset);
-				bytes -= avail;
-				wait_on_buffer(bh[k]);
-				if (!buffer_uptodate(bh[k]))
-					goto release_mutex;
-
-				if (avail == 0) {
-					offset = 0;
-					put_bh(bh[k++]);
-					continue;
-				}
-
-				msblk->stream.next_in = bh[k]->b_data + offset;
-				msblk->stream.avail_in = avail;
-				offset = 0;
-			}
-
-			if (msblk->stream.avail_out == 0 && page < pages) {
-				msblk->stream.next_out = buffer[page++];
-				msblk->stream.avail_out = PAGE_CACHE_SIZE;
-			}
-
-			if (!zlib_init) {
-				zlib_err = zlib_inflateInit(&msblk->stream);
-				if (zlib_err != Z_OK) {
-					ERROR("zlib_inflateInit returned"
-						" unexpected result 0x%x,"
-						" srclength %d\n", zlib_err,
-						srclength);
-					goto release_mutex;
-				}
-				zlib_init = 1;
-			}
-
-			zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH);
-
-			if (msblk->stream.avail_in == 0 && k < b)
-				put_bh(bh[k++]);
-		} while (zlib_err == Z_OK);
-
-		if (zlib_err != Z_STREAM_END) {
-			ERROR("zlib_inflate error, data probably corrupt\n");
-			goto release_mutex;
-		}
-
-		zlib_err = zlib_inflateEnd(&msblk->stream);
-		if (zlib_err != Z_OK) {
-			ERROR("zlib_inflate error, data probably corrupt\n");
-			goto release_mutex;
-		}
-		length = msblk->stream.total_out;
-		mutex_unlock(&msblk->read_data_mutex);
+		length = squashfs_decompress(msblk, buffer, bh, b, offset,
+			 length, srclength, pages);
+		if (length < 0)
+			goto read_failure;
 	} else {
 		/*
 		 * Block is uncompressed.
@@ -255,9 +192,6 @@
 	kfree(bh);
 	return length;
 
-release_mutex:
-	mutex_unlock(&msblk->read_data_mutex);
-
 block_release:
 	for (; k < b; k++)
 		put_bh(bh[k]);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 40c98fa..57314be 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -51,7 +51,6 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
-#include <linux/zlib.h>
 #include <linux/pagemap.h>
 
 #include "squashfs_fs.h"
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
new file mode 100644
index 0000000..157478d
--- /dev/null
+++ b/fs/squashfs/decompressor.c
@@ -0,0 +1,68 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * decompressor.c
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file (and decompressor.h) implements a decompressor framework for
+ * Squashfs, allowing multiple decompressors to be easily supported
+ */
+
+static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
+	NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+};
+
+static const struct squashfs_decompressor squashfs_lzo_unsupported_comp_ops = {
+	NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+};
+
+static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
+	NULL, NULL, NULL, 0, "unknown", 0
+};
+
+static const struct squashfs_decompressor *decompressor[] = {
+	&squashfs_zlib_comp_ops,
+	&squashfs_lzma_unsupported_comp_ops,
+	&squashfs_lzo_unsupported_comp_ops,
+	&squashfs_unknown_comp_ops
+};
+
+
+const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
+{
+	int i;
+
+	for (i = 0; decompressor[i]->id; i++)
+		if (id == decompressor[i]->id)
+			break;
+
+	return decompressor[i];
+}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
new file mode 100644
index 0000000..7425f80
--- /dev/null
+++ b/fs/squashfs/decompressor.h
@@ -0,0 +1,55 @@
+#ifndef DECOMPRESSOR_H
+#define DECOMPRESSOR_H
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * decompressor.h
+ */
+
+struct squashfs_decompressor {
+	void	*(*init)(struct squashfs_sb_info *);
+	void	(*free)(void *);
+	int	(*decompress)(struct squashfs_sb_info *, void **,
+		struct buffer_head **, int, int, int, int, int);
+	int	id;
+	char	*name;
+	int	supported;
+};
+
+static inline void *squashfs_decompressor_init(struct squashfs_sb_info *msblk)
+{
+	return msblk->decompressor->init(msblk);
+}
+
+static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
+	void *s)
+{
+	if (msblk->decompressor)
+		msblk->decompressor->free(s);
+}
+
+static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
+	void **buffer, struct buffer_head **bh, int b, int offset, int length,
+	int srclength, int pages)
+{
+	return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
+		length, srclength, pages);
+}
+#endif
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 566b0ea..12b933a 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -30,7 +30,6 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 2b1b8fe..7f93d5a 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -39,7 +39,6 @@
 #include <linux/vfs.h>
 #include <linux/dcache.h>
 #include <linux/exportfs.h>
-#include <linux/zlib.h>
 #include <linux/slab.h>
 
 #include "squashfs_fs.h"
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 717767d..a25c506 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,7 +47,6 @@
 #include <linux/string.h>
 #include <linux/pagemap.h>
 #include <linux/mutex.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
index b5a2c15..7c90bbd 100644
--- a/fs/squashfs/fragment.c
+++ b/fs/squashfs/fragment.c
@@ -36,7 +36,6 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index 3795b83..b7f64bc 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -34,7 +34,6 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 9101dbd..49daaf6 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -40,7 +40,6 @@
 
 #include <linux/fs.h>
 #include <linux/vfs.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 9e39865..5266bd8 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -57,7 +57,6 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/dcache.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 0e9feb6..fe2587a 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -51,6 +51,9 @@
 				u64, int);
 extern int squashfs_read_table(struct super_block *, void *, u64, int);
 
+/* decompressor.c */
+extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
+
 /* export.c */
 extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64,
 				unsigned int);
@@ -71,7 +74,7 @@
 extern int squashfs_read_inode(struct inode *, long long);
 
 /*
- * Inodes and files operations
+ * Inodes, files and decompressor operations
  */
 
 /* dir.c */
@@ -88,3 +91,6 @@
 
 /* symlink.c */
 extern const struct address_space_operations squashfs_symlink_aops;
+
+/* zlib_wrapper.c */
+extern const struct squashfs_decompressor squashfs_zlib_comp_ops;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 283daaf..7902424 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -183,8 +183,6 @@
 #define SQUASHFS_MAX_FILE_SIZE		(1LL << \
 					(SQUASHFS_MAX_FILE_SIZE_LOG - 2))
 
-#define SQUASHFS_MARKER_BYTE		0xff
-
 /* meta index cache */
 #define SQUASHFS_META_INDEXES	(SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
 #define SQUASHFS_META_ENTRIES	127
@@ -211,7 +209,9 @@
 /*
  * definitions for structures on disk
  */
-#define ZLIB_COMPRESSION	 1
+#define ZLIB_COMPRESSION	1
+#define LZMA_COMPRESSION	2
+#define LZO_COMPRESSION		3
 
 struct squashfs_super_block {
 	__le32			s_magic;
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index c8c6561..2e77dc5 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -52,25 +52,25 @@
 };
 
 struct squashfs_sb_info {
-	int			devblksize;
-	int			devblksize_log2;
-	struct squashfs_cache	*block_cache;
-	struct squashfs_cache	*fragment_cache;
-	struct squashfs_cache	*read_page;
-	int			next_meta_index;
-	__le64			*id_table;
-	__le64			*fragment_index;
-	unsigned int		*fragment_index_2;
-	struct mutex		read_data_mutex;
-	struct mutex		meta_index_mutex;
-	struct meta_index	*meta_index;
-	z_stream		stream;
-	__le64			*inode_lookup_table;
-	u64			inode_table;
-	u64			directory_table;
-	unsigned int		block_size;
-	unsigned short		block_log;
-	long long		bytes_used;
-	unsigned int		inodes;
+	const struct squashfs_decompressor	*decompressor;
+	int					devblksize;
+	int					devblksize_log2;
+	struct squashfs_cache			*block_cache;
+	struct squashfs_cache			*fragment_cache;
+	struct squashfs_cache			*read_page;
+	int					next_meta_index;
+	__le64					*id_table;
+	__le64					*fragment_index;
+	struct mutex				read_data_mutex;
+	struct mutex				meta_index_mutex;
+	struct meta_index			*meta_index;
+	void					*stream;
+	__le64					*inode_lookup_table;
+	u64					inode_table;
+	u64					directory_table;
+	unsigned int				block_size;
+	unsigned short				block_log;
+	long long				bytes_used;
+	unsigned int				inodes;
 };
 #endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 6c197ef..3550aec 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -35,34 +35,41 @@
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/zlib.h>
 #include <linux/magic.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
 #include "squashfs_fs_i.h"
 #include "squashfs.h"
+#include "decompressor.h"
 
 static struct file_system_type squashfs_fs_type;
 static const struct super_operations squashfs_super_ops;
 
-static int supported_squashfs_filesystem(short major, short minor, short comp)
+static const struct squashfs_decompressor *supported_squashfs_filesystem(short
+	major, short minor, short id)
 {
+	const struct squashfs_decompressor *decompressor;
+
 	if (major < SQUASHFS_MAJOR) {
 		ERROR("Major/Minor mismatch, older Squashfs %d.%d "
 			"filesystems are unsupported\n", major, minor);
-		return -EINVAL;
+		return NULL;
 	} else if (major > SQUASHFS_MAJOR || minor > SQUASHFS_MINOR) {
 		ERROR("Major/Minor mismatch, trying to mount newer "
 			"%d.%d filesystem\n", major, minor);
 		ERROR("Please update your kernel\n");
-		return -EINVAL;
+		return NULL;
 	}
 
-	if (comp != ZLIB_COMPRESSION)
-		return -EINVAL;
+	decompressor = squashfs_lookup_decompressor(id);
+	if (!decompressor->supported) {
+		ERROR("Filesystem uses \"%s\" compression. This is not "
+			"supported\n", decompressor->name);
+		return NULL;
+	}
 
-	return 0;
+	return decompressor;
 }
 
 
@@ -87,13 +94,6 @@
 	}
 	msblk = sb->s_fs_info;
 
-	msblk->stream.workspace = kmalloc(zlib_inflate_workspacesize(),
-		GFP_KERNEL);
-	if (msblk->stream.workspace == NULL) {
-		ERROR("Failed to allocate zlib workspace\n");
-		goto failure;
-	}
-
 	sblk = kzalloc(sizeof(*sblk), GFP_KERNEL);
 	if (sblk == NULL) {
 		ERROR("Failed to allocate squashfs_super_block\n");
@@ -120,25 +120,25 @@
 		goto failed_mount;
 	}
 
+	err = -EINVAL;
+
 	/* Check it is a SQUASHFS superblock */
 	sb->s_magic = le32_to_cpu(sblk->s_magic);
 	if (sb->s_magic != SQUASHFS_MAGIC) {
 		if (!silent)
 			ERROR("Can't find a SQUASHFS superblock on %s\n",
 						bdevname(sb->s_bdev, b));
-		err = -EINVAL;
 		goto failed_mount;
 	}
 
-	/* Check the MAJOR & MINOR versions and compression type */
-	err = supported_squashfs_filesystem(le16_to_cpu(sblk->s_major),
+	/* Check the MAJOR & MINOR versions and lookup compression type */
+	msblk->decompressor = supported_squashfs_filesystem(
+			le16_to_cpu(sblk->s_major),
 			le16_to_cpu(sblk->s_minor),
 			le16_to_cpu(sblk->compression));
-	if (err < 0)
+	if (msblk->decompressor == NULL)
 		goto failed_mount;
 
-	err = -EINVAL;
-
 	/*
 	 * Check if there's xattrs in the filesystem.  These are not
 	 * supported in this version, so warn that they will be ignored.
@@ -205,6 +205,10 @@
 
 	err = -ENOMEM;
 
+	msblk->stream = squashfs_decompressor_init(msblk);
+	if (msblk->stream == NULL)
+		goto failed_mount;
+
 	msblk->block_cache = squashfs_cache_init("metadata",
 			SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE);
 	if (msblk->block_cache == NULL)
@@ -292,17 +296,16 @@
 	squashfs_cache_delete(msblk->block_cache);
 	squashfs_cache_delete(msblk->fragment_cache);
 	squashfs_cache_delete(msblk->read_page);
+	squashfs_decompressor_free(msblk, msblk->stream);
 	kfree(msblk->inode_lookup_table);
 	kfree(msblk->fragment_index);
 	kfree(msblk->id_table);
-	kfree(msblk->stream.workspace);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
 	kfree(sblk);
 	return err;
 
 failure:
-	kfree(msblk->stream.workspace);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
 	return -ENOMEM;
@@ -346,10 +349,10 @@
 		squashfs_cache_delete(sbi->block_cache);
 		squashfs_cache_delete(sbi->fragment_cache);
 		squashfs_cache_delete(sbi->read_page);
+		squashfs_decompressor_free(sbi, sbi->stream);
 		kfree(sbi->id_table);
 		kfree(sbi->fragment_index);
 		kfree(sbi->meta_index);
-		kfree(sbi->stream.workspace);
 		kfree(sb->s_fs_info);
 		sb->s_fs_info = NULL;
 	}
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index 83d8788..e80be20 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -36,7 +36,6 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/pagemap.h>
-#include <linux/zlib.h>
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
new file mode 100644
index 0000000..4dd70e0
--- /dev/null
+++ b/fs/squashfs/zlib_wrapper.c
@@ -0,0 +1,150 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * zlib_wrapper.c
+ */
+
+
+#include <linux/mutex.h>
+#include <linux/buffer_head.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "decompressor.h"
+
+static void *zlib_init(struct squashfs_sb_info *dummy)
+{
+	z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
+	if (stream == NULL)
+		goto failed;
+	stream->workspace = kmalloc(zlib_inflate_workspacesize(),
+		GFP_KERNEL);
+	if (stream->workspace == NULL)
+		goto failed;
+
+	return stream;
+
+failed:
+	ERROR("Failed to allocate zlib workspace\n");
+	kfree(stream);
+	return NULL;
+}
+
+
+static void zlib_free(void *strm)
+{
+	z_stream *stream = strm;
+
+	if (stream)
+		kfree(stream->workspace);
+	kfree(stream);
+}
+
+
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+	struct buffer_head **bh, int b, int offset, int length, int srclength,
+	int pages)
+{
+	int zlib_err = 0, zlib_init = 0;
+	int avail, bytes, k = 0, page = 0;
+	z_stream *stream = msblk->stream;
+
+	mutex_lock(&msblk->read_data_mutex);
+
+	stream->avail_out = 0;
+	stream->avail_in = 0;
+
+	bytes = length;
+	do {
+		if (stream->avail_in == 0 && k < b) {
+			avail = min(bytes, msblk->devblksize - offset);
+			bytes -= avail;
+			wait_on_buffer(bh[k]);
+			if (!buffer_uptodate(bh[k]))
+				goto release_mutex;
+
+			if (avail == 0) {
+				offset = 0;
+				put_bh(bh[k++]);
+				continue;
+			}
+
+			stream->next_in = bh[k]->b_data + offset;
+			stream->avail_in = avail;
+			offset = 0;
+		}
+
+		if (stream->avail_out == 0 && page < pages) {
+			stream->next_out = buffer[page++];
+			stream->avail_out = PAGE_CACHE_SIZE;
+		}
+
+		if (!zlib_init) {
+			zlib_err = zlib_inflateInit(stream);
+			if (zlib_err != Z_OK) {
+				ERROR("zlib_inflateInit returned unexpected "
+					"result 0x%x, srclength %d\n",
+					zlib_err, srclength);
+				goto release_mutex;
+			}
+			zlib_init = 1;
+		}
+
+		zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH);
+
+		if (stream->avail_in == 0 && k < b)
+			put_bh(bh[k++]);
+	} while (zlib_err == Z_OK);
+
+	if (zlib_err != Z_STREAM_END) {
+		ERROR("zlib_inflate error, data probably corrupt\n");
+		goto release_mutex;
+	}
+
+	zlib_err = zlib_inflateEnd(stream);
+	if (zlib_err != Z_OK) {
+		ERROR("zlib_inflate error, data probably corrupt\n");
+		goto release_mutex;
+	}
+
+	mutex_unlock(&msblk->read_data_mutex);
+	return stream->total_out;
+
+release_mutex:
+	mutex_unlock(&msblk->read_data_mutex);
+
+	for (; k < b; k++)
+		put_bh(bh[k]);
+
+	return -EIO;
+}
+
+const struct squashfs_decompressor squashfs_zlib_comp_ops = {
+	.init = zlib_init,
+	.free = zlib_free,
+	.decompress = zlib_uncompress,
+	.id = ZLIB_COMPRESSION,
+	.name = "zlib",
+	.supported = 1
+};
+
diff --git a/fs/super.c b/fs/super.c
index aff046b..f35ac60 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -568,7 +568,7 @@
 int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
-	int remount_rw;
+	int remount_rw, remount_ro;
 
 	if (sb->s_frozen != SB_UNFROZEN)
 		return -EBUSY;
@@ -583,9 +583,12 @@
 	shrink_dcache_sb(sb);
 	sync_filesystem(sb);
 
+	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
+	remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
+
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
-	if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
+	if (remount_ro) {
 		if (force)
 			mark_files_ro(sb);
 		else if (!fs_may_remount_ro(sb))
@@ -594,7 +597,6 @@
 		if (retval < 0 && retval != -ENOSYS)
 			return -EBUSY;
 	}
-	remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
 
 	if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &flags, data);
@@ -604,6 +606,16 @@
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 	if (remount_rw)
 		vfs_dq_quota_on_remount(sb);
+	/*
+	 * Some filesystems modify their metadata via some other path than the
+	 * bdev buffer cache (eg. use a private mapping, or directories in
+	 * pagecache, etc). Also file data modifications go via their own
+	 * mappings. So If we try to mount readonly then copy the filesystem
+	 * from bdev, we could get stale data, so invalidate it to give a best
+	 * effort at coherency.
+	 */
+	if (remount_ro && sb->s_bdev)
+		invalidate_bdev(sb->s_bdev);
 	return 0;
 }
 
@@ -925,6 +937,9 @@
 	if (!mnt)
 		goto out;
 
+	if (flags & MS_KERNMOUNT)
+		mnt->mnt_flags = MNT_INTERNAL;
+
 	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
 		secdata = alloc_secdata();
 		if (!secdata)
diff --git a/fs/sync.c b/fs/sync.c
index 418727a..f557d71 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -34,14 +34,14 @@
 	if (!sb->s_bdi)
 		return 0;
 
-	/* Avoid doing twice syncing and cache pruning for quota sync */
-	if (!wait) {
-		writeout_quota_sb(sb, -1);
-		writeback_inodes_sb(sb);
-	} else {
-		sync_quota_sb(sb, -1);
+	if (sb->s_qcop && sb->s_qcop->quota_sync)
+		sb->s_qcop->quota_sync(sb, -1, wait);
+
+	if (wait)
 		sync_inodes_sb(sb);
-	}
+	else
+		writeback_inodes_sb(sb);
+
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 9824743..4573734 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 #include <linux/namei.h>
 #include <asm/byteorder.h>
 #include "sysv.h"
@@ -246,7 +247,7 @@
 	return ERR_PTR(-EIO);
 }
 
-int sysv_write_inode(struct inode *inode, int wait)
+static int __sysv_write_inode(struct inode *inode, int wait)
 {
 	struct super_block * sb = inode->i_sb;
 	struct sysv_sb_info * sbi = SYSV_SB(sb);
@@ -296,9 +297,14 @@
 	return 0;
 }
 
+int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int sysv_sync_inode(struct inode *inode)
 {
-	return sysv_write_inode(inode, 1);
+	return __sysv_write_inode(inode, 1);
 }
 
 static void sysv_delete_inode(struct inode *inode)
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 53786eb..94cb9b4 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -142,7 +142,7 @@
 
 /* inode.c */
 extern struct inode *sysv_iget(struct super_block *, unsigned int);
-extern int sysv_write_inode(struct inode *, int);
+extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
 extern int sysv_sync_inode(struct inode *);
 extern void sysv_set_inode(struct inode *, dev_t);
 extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 552fb01..401e503 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1120,7 +1120,7 @@
 	if (release)
 		ubifs_release_budget(c, &ino_req);
 	if (IS_SYNC(old_inode))
-		err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
+		err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
 	return err;
 
 out_cancel:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 16a6444..e26c02a 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1011,7 +1011,7 @@
 	/* Is the page fully inside @i_size? */
 	if (page->index < end_index) {
 		if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
-			err = inode->i_sb->s_op->write_inode(inode, 1);
+			err = inode->i_sb->s_op->write_inode(inode, NULL);
 			if (err)
 				goto out_unlock;
 			/*
@@ -1039,7 +1039,7 @@
 	kunmap_atomic(kaddr, KM_USER0);
 
 	if (i_size > synced_i_size) {
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 		if (err)
 			goto out_unlock;
 	}
@@ -1242,7 +1242,7 @@
 	if (release)
 		ubifs_release_budget(c, &req);
 	if (IS_SYNC(inode))
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 	return err;
 
 out:
@@ -1316,7 +1316,7 @@
 	 * the inode unless this is a 'datasync()' call.
 	 */
 	if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 		if (err)
 			return err;
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 43f9d19..4d2f215 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -283,7 +283,7 @@
 /*
  * Note, Linux write-back code calls this without 'i_mutex'.
  */
-static int ubifs_write_inode(struct inode *inode, int wait)
+static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err = 0;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 82372e3..ccc3ad7 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -208,7 +208,7 @@
 					((char *)bh->b_data)[(bit + i) >> 3]);
 			} else {
 				if (inode)
-					vfs_dq_free_block(inode, 1);
+					dquot_free_block(inode, 1);
 				udf_add_free_space(sb, sbi->s_partition, 1);
 			}
 		}
@@ -260,11 +260,11 @@
 		while (bit < (sb->s_blocksize << 3) && block_count > 0) {
 			if (!udf_test_bit(bit, bh->b_data))
 				goto out;
-			else if (vfs_dq_prealloc_block(inode, 1))
+			else if (dquot_prealloc_block(inode, 1))
 				goto out;
 			else if (!udf_clear_bit(bit, bh->b_data)) {
 				udf_debug("bit already cleared for block %d\n", bit);
-				vfs_dq_free_block(inode, 1);
+				dquot_free_block(inode, 1);
 				goto out;
 			}
 			block_count--;
@@ -390,10 +390,14 @@
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (inode && vfs_dq_alloc_block(inode, 1)) {
-		mutex_unlock(&sbi->s_alloc_mutex);
-		*err = -EDQUOT;
-		return 0;
+	if (inode) {
+		int ret = dquot_alloc_block(inode, 1);
+
+		if (ret) {
+			mutex_unlock(&sbi->s_alloc_mutex);
+			*err = ret;
+			return 0;
+		}
 	}
 
 	newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) -
@@ -449,7 +453,7 @@
 	/* We do this up front - There are some error conditions that
 	   could occure, but.. oh well */
 	if (inode)
-		vfs_dq_free_block(inode, count);
+		dquot_free_block(inode, count);
 	udf_add_free_space(sb, sbi->s_partition, count);
 
 	start = bloc->logicalBlockNum + offset;
@@ -547,7 +551,7 @@
 		}
 
 		if (epos.offset + (2 * adsize) > sb->s_blocksize) {
-			char *sptr, *dptr;
+			unsigned char *sptr, *dptr;
 			int loffset;
 
 			brelse(oepos.bh);
@@ -694,7 +698,7 @@
 		epos.offset -= adsize;
 
 		alloc_count = (elen >> sb->s_blocksize_bits);
-		if (inode && vfs_dq_prealloc_block(inode,
+		if (inode && dquot_prealloc_block(inode,
 			alloc_count > block_count ? block_count : alloc_count))
 			alloc_count = 0;
 		else if (alloc_count > block_count) {
@@ -797,12 +801,13 @@
 	newblock = goal_eloc.logicalBlockNum;
 	goal_eloc.logicalBlockNum++;
 	goal_elen -= sb->s_blocksize;
-
-	if (inode && vfs_dq_alloc_block(inode, 1)) {
-		brelse(goal_epos.bh);
-		mutex_unlock(&sbi->s_alloc_mutex);
-		*err = -EDQUOT;
-		return 0;
+	if (inode) {
+		*err = dquot_alloc_block(inode, 1);
+		if (*err) {
+			brelse(goal_epos.bh);
+			mutex_unlock(&sbi->s_alloc_mutex);
+			return 0;
+		}
 	}
 
 	if (goal_elen)
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 61d9a76..f0f2a43 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -45,8 +45,8 @@
 	int block, iblock;
 	loff_t nf_pos = (filp->f_pos - 1) << 2;
 	int flen;
-	char *fname = NULL;
-	char *nameptr;
+	unsigned char *fname = NULL;
+	unsigned char *nameptr;
 	uint16_t liu;
 	uint8_t lfi;
 	loff_t size = udf_ext0_offset(dir) + dir->i_size;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index f311d50..1eb0677 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,6 +34,7 @@
 #include <linux/errno.h>
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/aio.h>
 
@@ -207,7 +208,7 @@
 	.read			= do_sync_read,
 	.aio_read		= generic_file_aio_read,
 	.ioctl			= udf_ioctl,
-	.open			= generic_file_open,
+	.open			= dquot_file_open,
 	.mmap			= generic_file_mmap,
 	.write			= do_sync_write,
 	.aio_write		= udf_file_aio_write,
@@ -217,6 +218,29 @@
 	.llseek			= generic_file_llseek,
 };
 
+static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, iattr);
+	if (error)
+		return error;
+
+	if (iattr->ia_valid & ATTR_SIZE)
+		dquot_initialize(inode);
+
+	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
+            (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
+		error = dquot_transfer(inode, iattr);
+		if (error)
+			return error;
+	}
+
+	return inode_setattr(inode, iattr);
+}
+
 const struct inode_operations udf_file_inode_operations = {
-	.truncate = udf_truncate,
+	.truncate		= udf_truncate,
+	.setattr		= udf_setattr,
 };
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index c10fa39..fb68c9c 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -36,8 +36,8 @@
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_free_inode(inode);
+	dquot_drop(inode);
 
 	clear_inode(inode);
 
@@ -61,7 +61,7 @@
 	struct super_block *sb = dir->i_sb;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct inode *inode;
-	int block;
+	int block, ret;
 	uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
 	struct udf_inode_info *iinfo;
 	struct udf_inode_info *dinfo = UDF_I(dir);
@@ -153,12 +153,14 @@
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 
-	if (vfs_dq_alloc_inode(inode)) {
-		vfs_dq_drop(inode);
+	dquot_initialize(inode);
+	ret = dquot_alloc_inode(inode);
+	if (ret) {
+		dquot_drop(inode);
 		inode->i_flags |= S_NOQUOTA;
 		inode->i_nlink = 0;
 		iput(inode);
-		*err = -EDQUOT;
+		*err = ret;
 		return NULL;
 	}
 
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f90231e..b57ab04 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -36,6 +36,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
 
@@ -70,6 +71,9 @@
 
 void udf_delete_inode(struct inode *inode)
 {
+	if (!is_bad_inode(inode))
+		dquot_initialize(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -108,6 +112,8 @@
 			(unsigned long long)inode->i_size,
 			(unsigned long long)iinfo->i_lenExtents);
 	}
+
+	dquot_drop(inode);
 	kfree(iinfo->i_ext.i_data);
 	iinfo->i_ext.i_data = NULL;
 }
@@ -1373,12 +1379,12 @@
 	return mode;
 }
 
-int udf_write_inode(struct inode *inode, int sync)
+int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 
 	lock_kernel();
-	ret = udf_update_inode(inode, sync);
+	ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 
 	return ret;
@@ -1672,7 +1678,7 @@
 		return -1;
 
 	if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) {
-		char *sptr, *dptr;
+		unsigned char *sptr, *dptr;
 		struct buffer_head *nbh;
 		int err, loffset;
 		struct kernel_lb_addr obloc = epos->block;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index cd21150..db423ab 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -34,8 +34,8 @@
 #include <linux/crc-itu-t.h>
 #include <linux/exportfs.h>
 
-static inline int udf_match(int len1, const char *name1, int len2,
-			    const char *name2)
+static inline int udf_match(int len1, const unsigned char *name1, int len2,
+			    const unsigned char *name2)
 {
 	if (len1 != len2)
 		return 0;
@@ -142,15 +142,15 @@
 }
 
 static struct fileIdentDesc *udf_find_entry(struct inode *dir,
-					    struct qstr *child,
+					    const struct qstr *child,
 					    struct udf_fileident_bh *fibh,
 					    struct fileIdentDesc *cfi)
 {
 	struct fileIdentDesc *fi = NULL;
 	loff_t f_pos;
 	int block, flen;
-	char *fname = NULL;
-	char *nameptr;
+	unsigned char *fname = NULL;
+	unsigned char *nameptr;
 	uint8_t lfi;
 	uint16_t liu;
 	loff_t size;
@@ -308,7 +308,7 @@
 {
 	struct super_block *sb = dir->i_sb;
 	struct fileIdentDesc *fi = NULL;
-	char *name = NULL;
+	unsigned char *name = NULL;
 	int namelen;
 	loff_t f_pos;
 	loff_t size = udf_ext0_offset(dir) + dir->i_size;
@@ -563,6 +563,8 @@
 	int err;
 	struct udf_inode_info *iinfo;
 
+	dquot_initialize(dir);
+
 	lock_kernel();
 	inode = udf_new_inode(dir, mode, &err);
 	if (!inode) {
@@ -616,6 +618,8 @@
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 
+	dquot_initialize(dir);
+
 	lock_kernel();
 	err = -EIO;
 	inode = udf_new_inode(dir, mode, &err);
@@ -662,6 +666,8 @@
 	struct udf_inode_info *dinfo = UDF_I(dir);
 	struct udf_inode_info *iinfo;
 
+	dquot_initialize(dir);
+
 	lock_kernel();
 	err = -EMLINK;
 	if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
@@ -799,6 +805,8 @@
 	struct fileIdentDesc *fi, cfi;
 	struct kernel_lb_addr tloc;
 
+	dquot_initialize(dir);
+
 	retval = -ENOENT;
 	lock_kernel();
 	fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -845,6 +853,8 @@
 	struct fileIdentDesc cfi;
 	struct kernel_lb_addr tloc;
 
+	dquot_initialize(dir);
+
 	retval = -ENOENT;
 	lock_kernel();
 	fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -885,20 +895,22 @@
 {
 	struct inode *inode;
 	struct pathComponent *pc;
-	char *compstart;
+	const char *compstart;
 	struct udf_fileident_bh fibh;
 	struct extent_position epos = {};
 	int eoffset, elen = 0;
 	struct fileIdentDesc *fi;
 	struct fileIdentDesc cfi;
-	char *ea;
+	uint8_t *ea;
 	int err;
 	int block;
-	char *name = NULL;
+	unsigned char *name = NULL;
 	int namelen;
 	struct buffer_head *bh;
 	struct udf_inode_info *iinfo;
 
+	dquot_initialize(dir);
+
 	lock_kernel();
 	inode = udf_new_inode(dir, S_IFLNK, &err);
 	if (!inode)
@@ -970,7 +982,7 @@
 
 		pc = (struct pathComponent *)(ea + elen);
 
-		compstart = (char *)symname;
+		compstart = symname;
 
 		do {
 			symname++;
@@ -1069,6 +1081,8 @@
 	int err;
 	struct buffer_head *bh;
 
+	dquot_initialize(dir);
+
 	lock_kernel();
 	if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
 		unlock_kernel();
@@ -1131,6 +1145,9 @@
 	struct kernel_lb_addr tloc;
 	struct udf_inode_info *old_iinfo = UDF_I(old_inode);
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	lock_kernel();
 	ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
 	if (ofi) {
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index c3265e1..852e918 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -32,12 +32,12 @@
 #include <linux/buffer_head.h>
 #include "udf_i.h"
 
-static void udf_pc_to_char(struct super_block *sb, char *from, int fromlen,
-			   char *to)
+static void udf_pc_to_char(struct super_block *sb, unsigned char *from,
+			   int fromlen, unsigned char *to)
 {
 	struct pathComponent *pc;
 	int elen = 0;
-	char *p = to;
+	unsigned char *p = to;
 
 	while (elen < fromlen) {
 		pc = (struct pathComponent *)(from + elen);
@@ -75,9 +75,9 @@
 {
 	struct inode *inode = page->mapping->host;
 	struct buffer_head *bh = NULL;
-	char *symlink;
+	unsigned char *symlink;
 	int err = -EIO;
-	char *p = kmap(page);
+	unsigned char *p = kmap(page);
 	struct udf_inode_info *iinfo;
 
 	lock_kernel();
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8d46f42..4223ac8 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -142,7 +142,7 @@
 extern void udf_read_inode(struct inode *);
 extern void udf_delete_inode(struct inode *);
 extern void udf_clear_inode(struct inode *);
-extern int udf_write_inode(struct inode *, int);
+extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
 extern long udf_block_map(struct inode *, sector_t);
 extern int udf_extend_file(struct inode *, struct extent_position *,
 			   struct kernel_long_ad *, sector_t);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 54c16ec..5cfa4d8 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -85,7 +85,7 @@
 				   "bit already cleared for fragment %u", i);
 	}
 	
-	vfs_dq_free_block(inode, count);
+	dquot_free_block(inode, count);
 
 	
 	fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
@@ -195,7 +195,7 @@
 		ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 		if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 			ufs_clusteracct (sb, ucpi, blkno, 1);
-		vfs_dq_free_block(inode, uspi->s_fpb);
+		dquot_free_block(inode, uspi->s_fpb);
 
 		fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
 		uspi->cs_total.cs_nbfree++;
@@ -511,6 +511,7 @@
 	struct ufs_cg_private_info * ucpi;
 	struct ufs_cylinder_group * ucg;
 	unsigned cgno, fragno, fragoff, count, fragsize, i;
+	int ret;
 	
 	UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
 	     (unsigned long long)fragment, oldcount, newcount);
@@ -556,8 +557,9 @@
 		fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
 	for (i = oldcount; i < newcount; i++)
 		ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
-	if (vfs_dq_alloc_block(inode, count)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, count);
+	if (ret) {
+		*err = ret;
 		return 0;
 	}
 
@@ -596,6 +598,7 @@
 	struct ufs_cylinder_group * ucg;
 	unsigned oldcg, i, j, k, allocsize;
 	u64 result;
+	int ret;
 	
 	UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
 	     inode->i_ino, cgno, (unsigned long long)goal, count);
@@ -664,7 +667,7 @@
 		for (i = count; i < uspi->s_fpb; i++)
 			ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
 		i = uspi->s_fpb - count;
-		vfs_dq_free_block(inode, i);
+		dquot_free_block(inode, i);
 
 		fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
 		uspi->cs_total.cs_nffree += i;
@@ -676,8 +679,9 @@
 	result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
 	if (result == INVBLOCK)
 		return 0;
-	if (vfs_dq_alloc_block(inode, count)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, count);
+	if (ret) {
+		*err = ret;
 		return 0;
 	}
 	for (i = 0; i < count; i++)
@@ -714,6 +718,7 @@
 	struct ufs_super_block_first * usb1;
 	struct ufs_cylinder_group * ucg;
 	u64 result, blkno;
+	int ret;
 
 	UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
 
@@ -747,8 +752,9 @@
 	ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 	if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 		ufs_clusteracct (sb, ucpi, blkno, -1);
-	if (vfs_dq_alloc_block(inode, uspi->s_fpb)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, uspi->s_fpb);
+	if (ret) {
+		*err = ret;
 		return INVBLOCK;
 	}
 
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 22af68f..317a0d4 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -31,7 +31,7 @@
  * len <= UFS_MAXNAMLEN and de != NULL are guaranteed by caller.
  */
 static inline int ufs_match(struct super_block *sb, int len,
-		const char * const name, struct ufs_dir_entry * de)
+		const unsigned char *name, struct ufs_dir_entry *de)
 {
 	if (len != ufs_get_de_namlen(sb, de))
 		return 0;
@@ -70,7 +70,7 @@
 	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
 }
 
-ino_t ufs_inode_by_name(struct inode *dir, struct qstr *qstr)
+ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
 {
 	ino_t res = 0;
 	struct ufs_dir_entry *de;
@@ -249,11 +249,11 @@
  * (as a parameter - res_dir). Page is returned mapped and unlocked.
  * Entry is guaranteed to be valid.
  */
-struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct qstr *qstr,
+struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
 				     struct page **res_page)
 {
 	struct super_block *sb = dir->i_sb;
-	const char *name = qstr->name;
+	const unsigned char *name = qstr->name;
 	int namelen = qstr->len;
 	unsigned reclen = UFS_DIR_REC_LEN(namelen);
 	unsigned long start, n;
@@ -313,7 +313,7 @@
 int ufs_add_link(struct dentry *dentry, struct inode *inode)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
-	const char *name = dentry->d_name.name;
+	const unsigned char *name = dentry->d_name.name;
 	int namelen = dentry->d_name.len;
 	struct super_block *sb = dir->i_sb;
 	unsigned reclen = UFS_DIR_REC_LEN(namelen);
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 73655c6..a8962ce 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -40,7 +41,7 @@
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.open           = generic_file_open,
+	.open           = dquot_file_open,
 	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 3527c00..230ecf6 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -95,8 +95,8 @@
 
 	is_directory = S_ISDIR(inode->i_mode);
 
-	vfs_dq_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_free_inode(inode);
+	dquot_drop(inode);
 
 	clear_inode (inode);
 
@@ -355,9 +355,10 @@
 
 	unlock_super (sb);
 
-	if (vfs_dq_alloc_inode(inode)) {
-		vfs_dq_drop(inode);
-		err = -EDQUOT;
+	dquot_initialize(inode);
+	err = dquot_alloc_inode(inode);
+	if (err) {
+		dquot_drop(inode);
 		goto fail_without_unlock;
 	}
 
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cf3337..80b68c3 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -36,6 +36,8 @@
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -890,11 +892,11 @@
 	return 0;
 }
 
-int ufs_write_inode (struct inode * inode, int wait)
+int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 	lock_kernel();
-	ret = ufs_update_inode (inode, wait);
+	ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 	return ret;
 }
@@ -908,6 +910,9 @@
 {
 	loff_t old_i_size;
 
+	if (!is_bad_inode(inode))
+		dquot_initialize(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 	if (is_bad_inode(inode))
 		goto no_delete;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 4c26d9e..1185562 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -30,6 +30,7 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -84,6 +85,9 @@
 	int err;
 
 	UFSD("BEGIN\n");
+
+	dquot_initialize(dir);
+
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
 
@@ -107,6 +111,9 @@
 
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
+
+	dquot_initialize(dir);
+
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -131,6 +138,8 @@
 	if (l > sb->s_blocksize)
 		goto out_notlocked;
 
+	dquot_initialize(dir);
+
 	lock_kernel();
 	inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
@@ -176,6 +185,8 @@
 		return -EMLINK;
 	}
 
+	dquot_initialize(dir);
+
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
@@ -193,6 +204,8 @@
 	if (dir->i_nlink >= UFS_LINK_MAX)
 		goto out;
 
+	dquot_initialize(dir);
+
 	lock_kernel();
 	inode_inc_link_count(dir);
 
@@ -237,6 +250,8 @@
 	struct page *page;
 	int err = -ENOENT;
 
+	dquot_initialize(dir);
+
 	de = ufs_find_entry(dir, &dentry->d_name, &page);
 	if (!de)
 		goto out;
@@ -281,6 +296,9 @@
 	struct ufs_dir_entry *old_de;
 	int err = -ENOENT;
 
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
+
 	old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 143c20b..66b63a7 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1432,6 +1432,11 @@
 	kmem_cache_destroy(ufs_inode_cachep);
 }
 
+static void ufs_clear_inode(struct inode *inode)
+{
+	dquot_drop(inode);
+}
+
 #ifdef CONFIG_QUOTA
 static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
 static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
@@ -1442,6 +1447,7 @@
 	.destroy_inode	= ufs_destroy_inode,
 	.write_inode	= ufs_write_inode,
 	.delete_inode	= ufs_delete_inode,
+	.clear_inode	= ufs_clear_inode,
 	.put_super	= ufs_put_super,
 	.write_super	= ufs_write_super,
 	.sync_fs	= ufs_sync_fs,
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 41dd431..d3b6270 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -44,6 +44,7 @@
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/sched.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -517,9 +518,18 @@
 	if (error)
 		return error;
 
+	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+		error = dquot_transfer(inode, attr);
+		if (error)
+			return error;
+	}
 	if (ia_valid & ATTR_SIZE &&
 	    attr->ia_size != i_size_read(inode)) {
 		loff_t old_i_size = inode->i_size;
+
+		dquot_initialize(inode);
+
 		error = vmtruncate(inode, attr->ia_size);
 		if (error)
 			return error;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 0b4c39b..43f9f5d 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -86,9 +86,9 @@
 /* dir.c */
 extern const struct inode_operations ufs_dir_inode_operations;
 extern int ufs_add_link (struct dentry *, struct inode *);
-extern ino_t ufs_inode_by_name(struct inode *, struct qstr *);
+extern ino_t ufs_inode_by_name(struct inode *, const struct qstr *);
 extern int ufs_make_empty(struct inode *, struct inode *);
-extern struct ufs_dir_entry *ufs_find_entry(struct inode *, struct qstr *, struct page **);
+extern struct ufs_dir_entry *ufs_find_entry(struct inode *, const struct qstr *, struct page **);
 extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *);
 extern int ufs_empty_dir (struct inode *);
 extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
@@ -106,7 +106,7 @@
 
 /* inode.c */
 extern struct inode *ufs_iget(struct super_block *, unsigned long);
-extern int ufs_write_inode (struct inode *, int);
+extern int ufs_write_inode (struct inode *, struct writeback_control *);
 extern int ufs_sync_inode (struct inode *);
 extern void ufs_delete_inode (struct inode *);
 extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d4a0c8..1947514 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -44,20 +44,6 @@
 }
 
 STATIC int
-xfs_fs_quota_sync(
-	struct super_block	*sb,
-	int			type)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	if (sb->s_flags & MS_RDONLY)
-		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-	return -xfs_sync_data(mp, 0);
-}
-
-STATIC int
 xfs_fs_get_xstate(
 	struct super_block	*sb,
 	struct fs_quota_stat	*fqs)
@@ -82,8 +68,6 @@
 		return -EROFS;
 	if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
 		return -ENOSYS;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 
 	if (uflags & XFS_QUOTA_UDQ_ACCT)
 		flags |= XFS_UQUOTA_ACCT;
@@ -144,14 +128,11 @@
 		return -ENOSYS;
 	if (!XFS_IS_QUOTA_ON(mp))
 		return -ESRCH;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 
 	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
 }
 
 const struct quotactl_ops xfs_quotactl_operations = {
-	.quota_sync		= xfs_fs_quota_sync,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
 	.get_xquota		= xfs_fs_get_xquota,
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 25ea240..71345a3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1063,7 +1063,7 @@
 STATIC int
 xfs_fs_write_inode(
 	struct inode		*inode,
-	int			sync)
+	struct writeback_control *wbc)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -1074,11 +1074,7 @@
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	if (sync) {
-		error = xfs_wait_on_pages(ip, 0, -1);
-		if (error)
-			goto out;
-
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		/*
 		 * Make sure the inode has hit stable storage.  By using the
 		 * log and the fsync transactions we reduce the IOs we have
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index ffac157..4a3c4e4 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -801,6 +801,7 @@
 	 */
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
+	void (*gem_free_object_unlocked) (struct drm_gem_object *obj);
 
 	/* vga arb irq handler */
 	void (*vgaarb_irq)(struct drm_device *dev, bool state);
@@ -1427,6 +1428,7 @@
 int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_free(struct kref *kref);
+void drm_gem_object_free_unlocked(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 void drm_gem_object_handle_free(struct kref *kref);
@@ -1443,10 +1445,15 @@
 static inline void
 drm_gem_object_unreference(struct drm_gem_object *obj)
 {
-	if (obj == NULL)
-		return;
+	if (obj != NULL)
+		kref_put(&obj->refcount, drm_gem_object_free);
+}
 
-	kref_put(&obj->refcount, drm_gem_object_free);
+static inline void
+drm_gem_object_unreference_unlocked(struct drm_gem_object *obj)
+{
+	if (obj != NULL)
+		kref_put(&obj->refcount, drm_gem_object_free_unlocked);
 }
 
 int drm_gem_handle_create(struct drm_file *file_priv,
@@ -1475,6 +1482,21 @@
 	drm_gem_object_unreference(obj);
 }
 
+static inline void
+drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
+{
+	if (obj == NULL)
+		return;
+
+	/*
+	* Must bump handle count first as this may be the last
+	* ref, in which case the object would disappear before we
+	* checked for a name
+	*/
+	kref_put(&obj->handlecount, drm_gem_object_handle_free);
+	drm_gem_object_unreference_unlocked(obj);
+}
+
 struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev,
 					     struct drm_file *filp,
 					     u32 handle);
diff --git a/include/drm/drm_buffer.h b/include/drm/drm_buffer.h
new file mode 100644
index 0000000..322dbff
--- /dev/null
+++ b/include/drm/drm_buffer.h
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Pauli Nieminen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Multipart buffer for coping data which is larger than the page size.
+ *
+ * Authors:
+ * Pauli Nieminen <suokkos-at-gmail-dot-com>
+ */
+
+#ifndef _DRM_BUFFER_H_
+#define _DRM_BUFFER_H_
+
+#include "drmP.h"
+
+struct drm_buffer {
+	int iterator;
+	int size;
+	char *data[];
+};
+
+
+/**
+ * Return the index of page that buffer is currently pointing at.
+ */
+static inline int drm_buffer_page(struct drm_buffer *buf)
+{
+	return buf->iterator / PAGE_SIZE;
+}
+/**
+ * Return the index of the current byte in the page
+ */
+static inline int drm_buffer_index(struct drm_buffer *buf)
+{
+	return buf->iterator & (PAGE_SIZE - 1);
+}
+/**
+ * Return number of bytes that is left to process
+ */
+static inline int drm_buffer_unprocessed(struct drm_buffer *buf)
+{
+	return buf->size - buf->iterator;
+}
+
+/**
+ * Advance the buffer iterator number of bytes that is given.
+ */
+static inline void drm_buffer_advance(struct drm_buffer *buf, int bytes)
+{
+	buf->iterator += bytes;
+}
+
+/**
+ * Allocate the drm buffer object.
+ *
+ *   buf: A pointer to a pointer where the object is stored.
+ *   size: The number of bytes to allocate.
+ */
+extern int drm_buffer_alloc(struct drm_buffer **buf, int size);
+
+/**
+ * Copy the user data to the begin of the buffer and reset the processing
+ * iterator.
+ *
+ *   user_data: A pointer the data that is copied to the buffer.
+ *   size: The Number of bytes to copy.
+ */
+extern int drm_buffer_copy_from_user(struct drm_buffer *buf,
+		void __user *user_data, int size);
+
+/**
+ * Free the drm buffer object
+ */
+extern void drm_buffer_free(struct drm_buffer *buf);
+
+/**
+ * Read an object from buffer that may be split to multiple parts. If object
+ * is not split function just returns the pointer to object in buffer. But in
+ * case of split object data is copied to given stack object that is suplied
+ * by caller.
+ *
+ * The processing location of the buffer is also advanced to the next byte
+ * after the object.
+ *
+ *   objsize: The size of the objet in bytes.
+ *   stack_obj: A pointer to a memory location where object can be copied.
+ */
+extern void *drm_buffer_read_object(struct drm_buffer *buf,
+		int objsize, void *stack_obj);
+
+/**
+ * Returns the pointer to the dword which is offset number of elements from the
+ * current processing location.
+ *
+ * Caller must make sure that dword is not split in the buffer. This
+ * requirement is easily met if all the sizes of objects in buffer are
+ * multiples of dword and PAGE_SIZE is multiple dword.
+ *
+ * Call to this function doesn't change the processing location.
+ *
+ *   offset: The index of the dword relative to the internat iterator.
+ */
+static inline void *drm_buffer_pointer_to_dword(struct drm_buffer *buffer,
+		int offset)
+{
+	int iter = buffer->iterator + offset * 4;
+	return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
+}
+/**
+ * Returns the pointer to the dword which is offset number of elements from
+ * the current processing location.
+ *
+ * Call to this function doesn't change the processing location.
+ *
+ *   offset: The index of the byte relative to the internat iterator.
+ */
+static inline void *drm_buffer_pointer_to_byte(struct drm_buffer *buffer,
+		int offset)
+{
+	int iter = buffer->iterator + offset;
+	return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
+}
+
+#endif
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index fdf43ab..1347524 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -801,4 +801,6 @@
 				bool interlaced, int margins);
 extern int drm_add_modes_noedid(struct drm_connector *connector,
 				int hdisplay, int vdisplay);
+
+extern bool drm_edid_is_valid(struct edid *edid);
 #endif /* __DRM_CRTC_H__ */
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index d33c3e0..b420989 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -201,4 +201,7 @@
 
 #define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
 
+/* define the number of Extension EDID block */
+#define DRM_MAX_EDID_EXT_NUM 4
+
 #endif /* __DRM_EDID_H__ */
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index e6f3b12..676104b 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -141,6 +141,41 @@
 	{0x1002, 0x5e4c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5e4d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5e4f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6888, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6889, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x688A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6898, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6899, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x689c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x689d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x689e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68b8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68b9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68be, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68d9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68da, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68de, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68f1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68f8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68f9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68fe, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x7100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R520|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x7101, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R520|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x7102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R520|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
@@ -558,4 +593,5 @@
 	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x0042, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x0046, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x0102, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0, 0, 0}
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index f745948..a6a9f4a 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -25,13 +25,14 @@
 #ifndef __NOUVEAU_DRM_H__
 #define __NOUVEAU_DRM_H__
 
-#define NOUVEAU_DRM_HEADER_PATCHLEVEL 15
+#define NOUVEAU_DRM_HEADER_PATCHLEVEL 16
 
 struct drm_nouveau_channel_alloc {
 	uint32_t     fb_ctxdma_handle;
 	uint32_t     tt_ctxdma_handle;
 
 	int          channel;
+	uint32_t     pushbuf_domains;
 
 	/* Notifier memory */
 	uint32_t     notifier_handle;
@@ -109,68 +110,58 @@
 	uint32_t align;
 };
 
+#define NOUVEAU_GEM_MAX_BUFFERS 1024
+struct drm_nouveau_gem_pushbuf_bo_presumed {
+	uint32_t valid;
+	uint32_t domain;
+	uint64_t offset;
+};
+
 struct drm_nouveau_gem_pushbuf_bo {
 	uint64_t user_priv;
 	uint32_t handle;
 	uint32_t read_domains;
 	uint32_t write_domains;
 	uint32_t valid_domains;
-	uint32_t presumed_ok;
-	uint32_t presumed_domain;
-	uint64_t presumed_offset;
+	struct drm_nouveau_gem_pushbuf_bo_presumed presumed;
 };
 
 #define NOUVEAU_GEM_RELOC_LOW  (1 << 0)
 #define NOUVEAU_GEM_RELOC_HIGH (1 << 1)
 #define NOUVEAU_GEM_RELOC_OR   (1 << 2)
+#define NOUVEAU_GEM_MAX_RELOCS 1024
 struct drm_nouveau_gem_pushbuf_reloc {
+	uint32_t reloc_bo_index;
+	uint32_t reloc_bo_offset;
 	uint32_t bo_index;
-	uint32_t reloc_index;
 	uint32_t flags;
 	uint32_t data;
 	uint32_t vor;
 	uint32_t tor;
 };
 
-#define NOUVEAU_GEM_MAX_BUFFERS 1024
-#define NOUVEAU_GEM_MAX_RELOCS 1024
+#define NOUVEAU_GEM_MAX_PUSH 512
+struct drm_nouveau_gem_pushbuf_push {
+	uint32_t bo_index;
+	uint32_t pad;
+	uint64_t offset;
+	uint64_t length;
+};
 
 struct drm_nouveau_gem_pushbuf {
 	uint32_t channel;
-	uint32_t nr_dwords;
 	uint32_t nr_buffers;
-	uint32_t nr_relocs;
-	uint64_t dwords;
 	uint64_t buffers;
-	uint64_t relocs;
-};
-
-struct drm_nouveau_gem_pushbuf_call {
-	uint32_t channel;
-	uint32_t handle;
-	uint32_t offset;
-	uint32_t nr_buffers;
 	uint32_t nr_relocs;
-	uint32_t nr_dwords;
-	uint64_t buffers;
+	uint32_t nr_push;
 	uint64_t relocs;
+	uint64_t push;
 	uint32_t suffix0;
 	uint32_t suffix1;
-	/* below only accessed for CALL2 */
 	uint64_t vram_available;
 	uint64_t gart_available;
 };
 
-struct drm_nouveau_gem_pin {
-	uint32_t handle;
-	uint32_t domain;
-	uint64_t offset;
-};
-
-struct drm_nouveau_gem_unpin {
-	uint32_t handle;
-};
-
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_NOBLOCK                                 0x00000002
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
@@ -183,14 +174,6 @@
 	uint32_t handle;
 };
 
-struct drm_nouveau_gem_tile {
-	uint32_t handle;
-	uint32_t offset;
-	uint32_t size;
-	uint32_t tile_mode;
-	uint32_t tile_flags;
-};
-
 enum nouveau_bus_type {
 	NV_AGP     = 0,
 	NV_PCI     = 1,
@@ -200,22 +183,17 @@
 struct drm_nouveau_sarea {
 };
 
-#define DRM_NOUVEAU_CARD_INIT          0x00
-#define DRM_NOUVEAU_GETPARAM           0x01
-#define DRM_NOUVEAU_SETPARAM           0x02
-#define DRM_NOUVEAU_CHANNEL_ALLOC      0x03
-#define DRM_NOUVEAU_CHANNEL_FREE       0x04
-#define DRM_NOUVEAU_GROBJ_ALLOC        0x05
-#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x06
-#define DRM_NOUVEAU_GPUOBJ_FREE        0x07
+#define DRM_NOUVEAU_GETPARAM           0x00
+#define DRM_NOUVEAU_SETPARAM           0x01
+#define DRM_NOUVEAU_CHANNEL_ALLOC      0x02
+#define DRM_NOUVEAU_CHANNEL_FREE       0x03
+#define DRM_NOUVEAU_GROBJ_ALLOC        0x04
+#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x05
+#define DRM_NOUVEAU_GPUOBJ_FREE        0x06
 #define DRM_NOUVEAU_GEM_NEW            0x40
 #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
-#define DRM_NOUVEAU_GEM_PUSHBUF_CALL   0x42
-#define DRM_NOUVEAU_GEM_PIN            0x43 /* !KMS only */
-#define DRM_NOUVEAU_GEM_UNPIN          0x44 /* !KMS only */
-#define DRM_NOUVEAU_GEM_CPU_PREP       0x45
-#define DRM_NOUVEAU_GEM_CPU_FINI       0x46
-#define DRM_NOUVEAU_GEM_INFO           0x47
-#define DRM_NOUVEAU_GEM_PUSHBUF_CALL2  0x48
+#define DRM_NOUVEAU_GEM_CPU_PREP       0x42
+#define DRM_NOUVEAU_GEM_CPU_FINI       0x43
+#define DRM_NOUVEAU_GEM_INFO           0x44
 
 #endif /* __NOUVEAU_DRM_H__ */
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 39537f3..81e614b 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -808,6 +808,7 @@
 #define RADEON_TILING_SWAP_32BIT  0x8
 #define RADEON_TILING_SURFACE     0x10 /* this object requires a surface
 					* when mapped - i.e. front buffer */
+#define RADEON_TILING_MICRO_SQUARE 0x20
 
 struct drm_radeon_gem_set_tiling {
 	uint32_t	handle;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 4c4e0f8..e3f1b4a 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -908,7 +908,7 @@
  * Utility function that returns the pgprot_t that should be used for
  * setting up a PTE with the caching model indicated by @c_state.
  */
-extern pgprot_t ttm_io_prot(enum ttm_caching_state c_state, pgprot_t tmp);
+extern pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
 
 #if (defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE)))
 #define TTM_HAS_AGP
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3c7a358..f391d45 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -424,7 +424,7 @@
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
-extern void __audit_inode_child(const char *dname, const struct dentry *dentry,
+extern void __audit_inode_child(const struct dentry *dentry,
 				const struct inode *parent);
 extern void __audit_ptrace(struct task_struct *t);
 
@@ -442,11 +442,10 @@
 	if (unlikely(!audit_dummy_context()))
 		__audit_inode(name, dentry);
 }
-static inline void audit_inode_child(const char *dname, 
-				     const struct dentry *dentry,
+static inline void audit_inode_child(const struct dentry *dentry,
 				     const struct inode *parent) {
 	if (unlikely(!audit_dummy_context()))
-		__audit_inode_child(dname, dentry, parent);
+		__audit_inode_child(dentry, parent);
 }
 void audit_core_dumps(long signr);
 
@@ -544,9 +543,9 @@
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
 #define __audit_inode(n,d) do { ; } while (0)
-#define __audit_inode_child(d,i,p) do { ; } while (0)
+#define __audit_inode_child(i,p) do { ; } while (0)
 #define audit_inode(n,d) do { ; } while (0)
-#define audit_inode_child(d,i,p) do { ; } while (0)
+#define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)
 #define audit_get_loginuid(t) (-1)
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 21fd9b7..20ea12c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -31,6 +31,8 @@
  * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
  */
 typedef s32 dma_cookie_t;
+#define DMA_MIN_COOKIE	1
+#define DMA_MAX_COOKIE	INT_MAX
 
 #define dma_submit_error(cookie) ((cookie) < 0 ? 1 : 0)
 
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 6b04903..cac84b0 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -202,14 +202,6 @@
 		return flags & EXT3_OTHER_FLMASK;
 }
 
-/*
- * Inode dynamic state flags
- */
-#define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
-#define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
-#define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
-#define EXT3_STATE_FLUSH_ON_CLOSE	0x00000008
-
 /* Used to pass group descriptor data when online resize is done */
 struct ext3_new_group_input {
 	__u32 group;            /* Group number for this data */
@@ -560,6 +552,31 @@
 		(ino >= EXT3_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
 }
+
+/*
+ * Inode dynamic state flags
+ */
+enum {
+	EXT3_STATE_JDATA,		/* journaled data exists */
+	EXT3_STATE_NEW,			/* inode is newly created */
+	EXT3_STATE_XATTR,		/* has in-inode xattrs */
+	EXT3_STATE_FLUSH_ON_CLOSE,	/* flush dirty pages on close */
+};
+
+static inline int ext3_test_inode_state(struct inode *inode, int bit)
+{
+	return test_bit(bit, &EXT3_I(inode)->i_state);
+}
+
+static inline void ext3_set_inode_state(struct inode *inode, int bit)
+{
+	set_bit(bit, &EXT3_I(inode)->i_state);
+}
+
+static inline void ext3_clear_inode_state(struct inode *inode, int bit)
+{
+	clear_bit(bit, &EXT3_I(inode)->i_state);
+}
 #else
 /* Assume that user mode programs are passing in an ext3fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
@@ -877,7 +894,7 @@
 	int create);
 
 extern struct inode *ext3_iget(struct super_block *, unsigned long);
-extern int  ext3_write_inode (struct inode *, int);
+extern int  ext3_write_inode (struct inode *, struct writeback_control *);
 extern int  ext3_setattr (struct dentry *, struct iattr *);
 extern void ext3_delete_inode (struct inode *);
 extern int  ext3_sync_inode (handle_t *, struct inode *);
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 93e7428..7679acd 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -87,7 +87,7 @@
 	 * near to their parent directory's inode.
 	 */
 	__u32	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext3 */
+	unsigned long	i_state;	/* Dynamic state flags for ext3 */
 
 	/* block reservation info */
 	struct ext3_block_alloc_info *i_block_alloc_info;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 369767b..c10163b 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -543,6 +543,8 @@
 #define FB_EVENT_GET_REQ                0x0D
 /*      Unbind from the console if possible */
 #define FB_EVENT_FB_UNBIND              0x0E
+/*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */
+#define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
 
 struct fb_event {
 	struct fb_info *info;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ebb1cd5..4568962 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1305,6 +1305,8 @@
 #define MNT_FORCE	0x00000001	/* Attempt to forcibily umount */
 #define MNT_DETACH	0x00000002	/* Just detach from the tree */
 #define MNT_EXPIRE	0x00000004	/* Mark for expiry */
+#define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
+#define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
 
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
@@ -1314,9 +1316,9 @@
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
 	dev_t			s_dev;		/* search index; _not_ kdev_t */
-	unsigned long		s_blocksize;
-	unsigned char		s_blocksize_bits;
 	unsigned char		s_dirt;
+	unsigned char		s_blocksize_bits;
+	unsigned long		s_blocksize;
 	loff_t			s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
@@ -1357,16 +1359,16 @@
 	void 			*s_fs_info;	/* Filesystem private info */
 	fmode_t			s_mode;
 
+	/* Granularity of c/m/atime in ns.
+	   Cannot be worse than a second */
+	u32		   s_time_gran;
+
 	/*
 	 * The next field is for VFS *only*. No filesystems have any business
 	 * even looking at it. You had been warned.
 	 */
 	struct mutex s_vfs_rename_mutex;	/* Kludge */
 
-	/* Granularity of c/m/atime in ns.
-	   Cannot be worse than a second */
-	u32		   s_time_gran;
-
 	/*
 	 * Filesystem subtype.  If non-empty the filesystem type field
 	 * in /proc/mounts will be "type.subtype"
@@ -1555,7 +1557,7 @@
 	void (*destroy_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *);
-	int (*write_inode) (struct inode *, int);
+	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
@@ -1794,7 +1796,8 @@
 extern long do_mount(char *, char *, char *, unsigned long, void *);
 extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
-
+extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
+			  struct vfsmount *);
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
 
 extern int current_umask(void);
@@ -2058,12 +2061,6 @@
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
-static inline unsigned long __deprecated
-invalidate_inode_pages(struct address_space *mapping)
-{
-	return invalidate_mapping_pages(mapping, 0, ~0UL);
-}
-
 static inline void invalidate_remote_inode(struct inode *inode)
 {
 	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
@@ -2132,6 +2129,7 @@
  
 /* fs/dcache.c -- generic fs support functions */
 extern int is_subdir(struct dentry *, struct dentry *);
+extern int path_is_under(struct path *, struct path *);
 extern ino_t find_inode_number(struct dentry *, struct qstr *);
 
 #include <linux/err.h>
@@ -2340,8 +2338,6 @@
 extern int simple_sync_file(struct file *, struct dentry *, int);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
-extern int simple_prepare_write(struct file *file, struct page *page,
-			unsigned offset, unsigned to);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 936f9aa..df8fd9a 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -65,7 +65,7 @@
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
  */
 static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
-				 const char *old_name, const char *new_name,
+				 const char *old_name,
 				 int isdir, struct inode *target, struct dentry *moved)
 {
 	struct inode *source = moved->d_inode;
@@ -73,6 +73,7 @@
 	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
 	__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
+	const char *new_name = moved->d_name.name;
 
 	if (old_dir == new_dir)
 		old_dir_mask |= FS_DN_RENAME;
@@ -103,7 +104,7 @@
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
 		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
-	audit_inode_child(new_name, moved, new_dir);
+	audit_inode_child(moved, new_dir);
 }
 
 /*
@@ -146,7 +147,7 @@
 {
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
-	audit_inode_child(dentry->d_name.name, dentry, inode);
+	audit_inode_child(dentry, inode);
 
 	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
@@ -161,7 +162,7 @@
 	inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
 				  inode);
 	fsnotify_link_count(inode);
-	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
+	audit_inode_child(new_dentry, dir);
 
 	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0);
 }
@@ -175,7 +176,7 @@
 	struct inode *d_inode = dentry->d_inode;
 
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
-	audit_inode_child(dentry->d_name.name, dentry, inode);
+	audit_inode_child(dentry, inode);
 
 	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 0ec6129..97e6ab4 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -515,6 +515,8 @@
         u8	init_speed;	/* transfer rate set at boot */
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
+	u8	pio_mode;	/* for ->set_pio_mode _only_ */
+	u8	dma_mode;	/* for ->dma_pio_mode _only_ */
         u8	dn;		/* now wide spread use */
 	u8	acoustic;	/* acoustic management */
 	u8	media;		/* disk, cdrom, tape, floppy, ... */
@@ -622,8 +624,8 @@
  */
 struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
-	void	(*set_pio_mode)(ide_drive_t *, const u8);
-	void	(*set_dma_mode)(ide_drive_t *, const u8);
+	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
+	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
 	int	(*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
 	void	(*resetproc)(ide_drive_t *);
@@ -1494,7 +1496,6 @@
 #ifdef CONFIG_IDE_XFER_MODE
 int ide_scan_pio_blacklist(char *);
 const char *ide_xfer_verbose(u8);
-u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8);
 int ide_pio_need_iordy(ide_drive_t *, const u8);
 int ide_set_pio_mode(ide_drive_t *, u8);
 int ide_set_dma_mode(ide_drive_t *, u8);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 331530c..f3aa59c 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -246,19 +246,8 @@
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
-#if defined(CONFIG_BUFFER_DEBUG)
-void buffer_assertion_failure(struct buffer_head *bh);
-#define J_ASSERT_BH(bh, expr)						\
-	do {								\
-		if (!(expr))						\
-			buffer_assertion_failure(bh);			\
-		J_ASSERT(expr);						\
-	} while (0)
-#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
-#else
 #define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
-#endif
 
 #if defined(JBD_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 638ce45..1ec8763 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -69,15 +69,8 @@
 #define jbd_debug(f, a...)	/**/
 #endif
 
-static inline void *jbd2_alloc(size_t size, gfp_t flags)
-{
-	return (void *)__get_free_pages(flags, get_order(size));
-}
-
-static inline void jbd2_free(void *ptr, size_t size)
-{
-	free_pages((unsigned long)ptr, get_order(size));
-};
+extern void *jbd2_alloc(size_t size, gfp_t flags);
+extern void jbd2_free(void *ptr, size_t size);
 
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
 
@@ -284,19 +277,8 @@
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
-#if defined(CONFIG_BUFFER_DEBUG)
-void buffer_assertion_failure(struct buffer_head *bh);
-#define J_ASSERT_BH(bh, expr)						\
-	do {								\
-		if (!(expr))						\
-			buffer_assertion_failure(bh);			\
-		J_ASSERT(expr);						\
-	} while (0)
-#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
-#else
 #define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
-#endif
 
 #if defined(JBD2_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1b672f7..e7d1b2e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -122,6 +122,11 @@
 /* Kprobe status flags */
 #define KPROBE_FLAG_GONE	1 /* breakpoint has already gone */
 #define KPROBE_FLAG_DISABLED	2 /* probe is temporarily disabled */
+#define KPROBE_FLAG_OPTIMIZED	4 /*
+				   * probe is really optimized.
+				   * NOTE:
+				   * this flag is only for optimized_kprobe.
+				   */
 
 /* Has this kprobe gone ? */
 static inline int kprobe_gone(struct kprobe *p)
@@ -134,6 +139,12 @@
 {
 	return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE);
 }
+
+/* Is this kprobe really running optimized path ? */
+static inline int kprobe_optimized(struct kprobe *p)
+{
+	return p->flags & KPROBE_FLAG_OPTIMIZED;
+}
 /*
  * Special probe type that uses setjmp-longjmp type tricks to resume
  * execution at a specified entry with a matching prototype corresponding
@@ -249,6 +260,39 @@
 extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
+#ifdef CONFIG_OPTPROBES
+/*
+ * Internal structure for direct jump optimized probe
+ */
+struct optimized_kprobe {
+	struct kprobe kp;
+	struct list_head list;	/* list for optimizing queue */
+	struct arch_optimized_insn optinsn;
+};
+
+/* Architecture dependent functions for direct jump optimization */
+extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
+extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
+extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
+extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
+extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
+extern kprobe_opcode_t *get_optinsn_slot(void);
+extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
+extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+					unsigned long addr);
+
+extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+#ifdef CONFIG_SYSCTL
+extern int sysctl_kprobes_optimization;
+extern int proc_kprobes_optimization_handler(struct ctl_table *table,
+					     int write, void __user *buffer,
+					     size_t *length, loff_t *ppos);
+#endif
+
+#endif /* CONFIG_OPTPROBES */
+
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
 void kretprobe_hash_lock(struct task_struct *tsk,
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a24de0b..60df9c8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -103,7 +103,7 @@
 
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
-
+#define KVM_MEMSLOT_INVALID      (1UL << 1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -497,6 +497,11 @@
 #endif
 #define KVM_CAP_S390_PSW 42
 #define KVM_CAP_PPC_SEGSTATE 43
+#define KVM_CAP_HYPERV 44
+#define KVM_CAP_HYPERV_VAPIC 45
+#define KVM_CAP_HYPERV_SPIN 46
+#define KVM_CAP_PCI_SEGMENT 47
+#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -691,8 +696,9 @@
 	__u32 busnr;
 	__u32 devfn;
 	__u32 flags;
+	__u32 segnr;
 	union {
-		__u32 reserved[12];
+		__u32 reserved[11];
 	};
 };
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd5a616..a3fd0f9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
 #define KVM_REQ_KICK               9
+#define KVM_REQ_DEACTIVATE_FPU    10
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
@@ -57,20 +58,20 @@
 	struct kvm_io_device *devs[NR_IOBUS_DEVS];
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus);
-void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
-		     const void *val);
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
+enum kvm_bus {
+	KVM_MMIO_BUS,
+	KVM_PIO_BUS,
+	KVM_NR_BUSES
+};
+
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		     int len, const void *val);
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			    struct kvm_io_device *dev);
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev);
-void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev);
 
 struct kvm_vcpu {
 	struct kvm *kvm;
@@ -83,6 +84,8 @@
 	struct kvm_run *run;
 	unsigned long requests;
 	unsigned long guest_debug;
+	int srcu_idx;
+
 	int fpu_active;
 	int guest_fpu_loaded;
 	wait_queue_head_t wq;
@@ -150,14 +153,19 @@
 
 #endif
 
-struct kvm {
-	spinlock_t mmu_lock;
-	spinlock_t requests_lock;
-	struct rw_semaphore slots_lock;
-	struct mm_struct *mm; /* userspace tied to this vm */
+struct kvm_memslots {
 	int nmemslots;
 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
 					KVM_PRIVATE_MEM_SLOTS];
+};
+
+struct kvm {
+	spinlock_t mmu_lock;
+	raw_spinlock_t requests_lock;
+	struct mutex slots_lock;
+	struct mm_struct *mm; /* userspace tied to this vm */
+	struct kvm_memslots *memslots;
+	struct srcu_struct srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
@@ -166,8 +174,7 @@
 	atomic_t online_vcpus;
 	struct list_head vm_list;
 	struct mutex lock;
-	struct kvm_io_bus mmio_bus;
-	struct kvm_io_bus pio_bus;
+	struct kvm_io_bus *buses[KVM_NR_BUSES];
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 	struct {
 		spinlock_t        lock;
@@ -249,13 +256,20 @@
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc);
-int kvm_arch_set_memory_region(struct kvm *kvm,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
+				struct kvm_memory_slot old,
+				struct kvm_userspace_memory_region *mem,
+				int user_alloc);
+void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				struct kvm_memory_slot old,
 				int user_alloc);
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
+gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
@@ -264,6 +278,9 @@
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn);
+int memslot_id(struct kvm *kvm, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
@@ -283,6 +300,7 @@
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
@@ -383,6 +401,7 @@
 	struct work_struct interrupt_work;
 	struct list_head list;
 	int assigned_dev_id;
+	int host_segnr;
 	int host_busnr;
 	int host_devfn;
 	unsigned int entries_nr;
@@ -429,8 +448,7 @@
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
 #ifdef CONFIG_IOMMU_API
-int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
-			unsigned long npages);
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
@@ -480,11 +498,6 @@
 	current->flags &= ~PF_VCPU;
 }
 
-static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-	return slot - kvm->memslots;
-}
-
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
 {
 	return (gpa_t)gfn << PAGE_SHIFT;
@@ -532,6 +545,10 @@
 }
 #endif
 
+#ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+#define unalias_gfn_instantiation unalias_gfn
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 
 #define KVM_MAX_IRQ_ROUTES 1024
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 76285e0..eb9800f 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -52,7 +52,6 @@
 #define CGROUP_SUPER_MAGIC	0x27e0eb
 
 #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
-#define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
 
 #define STACK_END_MAGIC		0x57AC6E9D
 
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index 3568040..94cb51a 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -108,6 +108,8 @@
 #define	MC13783_REGU_V2		28
 #define	MC13783_REGU_V3		29
 #define	MC13783_REGU_V4		30
+#define	MC13783_REGU_PWGT1SPI	31
+#define	MC13783_REGU_PWGT2SPI	32
 
 #define MC13783_IRQ_ADCDONE	0
 #define MC13783_IRQ_ADCBISDONE	1
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d74785c..0b89efc 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -35,6 +35,7 @@
 extern const struct seq_operations mounts_op;
 extern const struct seq_operations mountinfo_op;
 extern const struct seq_operations mountstats_op;
+extern int mnt_had_events(struct proc_mounts *);
 
 #endif
 #endif
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b5f43a3..4bd0547 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -34,7 +34,18 @@
 
 #define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
-#define MNT_PNODE_MASK	0x3000	/* propagation flag mask */
+/*
+ * MNT_SHARED_MASK is the set of flags that should be cleared when a
+ * mount becomes shared.  Currently, this is only the flag that says a
+ * mount cannot be bind mounted, since this is how we create a mount
+ * that shares events with another mount.  If you add a new MNT_*
+ * flag, consider how it interacts with shared mounts.
+ */
+#define MNT_SHARED_MASK	(MNT_UNBINDABLE)
+#define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
+
+
+#define MNT_INTERNAL	0x4000
 
 struct vfsmount {
 	struct list_head mnt_hash;
@@ -123,7 +134,6 @@
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
-extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
 #endif /* _LINUX_MOUNT_H */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d09db1b..1a0b85a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,9 +33,6 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
-#define FLUSH_INVALIDATE	64	/* Invalidate the page cache */
-#define FLUSH_NOWRITEPAGE	128	/* Don't call writepage() */
 
 #ifdef __KERNEL__
 
@@ -166,6 +163,7 @@
 	struct radix_tree_root	nfs_page_tree;
 
 	unsigned long		npages;
+	unsigned long		ncommit;
 
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
@@ -349,7 +347,6 @@
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
@@ -477,21 +474,12 @@
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
 #endif
 
 static inline int
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6a2e44f..717a5e5 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -193,6 +193,8 @@
 	int		max_slots;		/* # slots in table */
 	int		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
+	int		target_max_slots;	/* Set by CB_RECALL_SLOT as
+						 * the new max_slots */
 };
 
 static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index a6861f1..b462916 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -279,9 +279,6 @@
 	struct mem_dqblk dq_dqb;	/* Diskquota usage */
 };
 
-#define QUOTA_OK          0
-#define NO_QUOTA          1
-
 /* Operations which must be implemented by each quota format */
 struct quota_format_ops {
 	int (*check_quota_file)(struct super_block *sb, int type);	/* Detect whether file is in our format */
@@ -295,13 +292,6 @@
 
 /* Operations working with dquots */
 struct dquot_operations {
-	int (*initialize) (struct inode *, int);
-	int (*drop) (struct inode *);
-	int (*alloc_space) (struct inode *, qsize_t, int);
-	int (*alloc_inode) (const struct inode *, qsize_t);
-	int (*free_space) (struct inode *, qsize_t);
-	int (*free_inode) (const struct inode *, qsize_t);
-	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
@@ -309,12 +299,6 @@
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
-	/* reserve quota for delayed block allocation */
-	int (*reserve_space) (struct inode *, qsize_t, int);
-	/* claim reserved quota for delayed alloc */
-	int (*claim_space) (struct inode *, qsize_t);
-	/* release rsved quota for delayed alloc */
-	void (*release_rsv) (struct inode *, qsize_t);
 	/* get reserved quota for delayed alloc, value returned is managed by
 	 * quota code only */
 	qsize_t *(*get_reserved_space) (struct inode *);
@@ -324,7 +308,7 @@
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, char *, int);
 	int (*quota_off)(struct super_block *, int, int);
-	int (*quota_sync)(struct super_block *, int);
+	int (*quota_sync)(struct super_block *, int, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
@@ -357,26 +341,25 @@
 #define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
 				 DQUOT_SUSPENDED)
 /* Other quota flags */
-#define DQUOT_QUOTA_SYS_FILE	(1 << 6)	/* Quota file is a special
+#define DQUOT_STATE_LAST	(_DQUOT_STATE_FLAGS * MAXQUOTAS)
+#define DQUOT_QUOTA_SYS_FILE	(1 << DQUOT_STATE_LAST)
+						/* Quota file is a special
 						 * system file and user cannot
 						 * touch it. Filesystem is
 						 * responsible for setting
 						 * S_NOQUOTA, S_NOATIME flags
 						 */
-#define DQUOT_NEGATIVE_USAGE	(1 << 7)	/* Allow negative quota usage */
+#define DQUOT_NEGATIVE_USAGE	(1 << (DQUOT_STATE_LAST + 1))
+					       /* Allow negative quota usage */
 
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-	if (type == USRQUOTA)
-		return flags;
-	return flags << _DQUOT_STATE_FLAGS;
+	return flags << _DQUOT_STATE_FLAGS * type;
 }
 
 static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
 {
-	if (type == USRQUOTA)
-		return flags;
-	return flags >> _DQUOT_STATE_FLAGS;
+	return (flags >> _DQUOT_STATE_FLAGS * type) & DQUOT_STATE_FLAGS;
 }
 
 #ifdef CONFIG_QUOTA_NETLINK_INTERFACE
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 3ebb231..e6fa7ac 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -19,15 +19,12 @@
 /*
  * declaration of quota_function calls in kernel.
  */
-void sync_quota_sb(struct super_block *sb, int type);
-static inline void writeout_quota_sb(struct super_block *sb, int type)
-{
-	if (sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, type);
-}
+void inode_add_rsv_space(struct inode *inode, qsize_t number);
+void inode_claim_rsv_space(struct inode *inode, qsize_t number);
+void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 
-int dquot_initialize(struct inode *inode, int type);
-int dquot_drop(struct inode *inode);
+void dquot_initialize(struct inode *inode);
+void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
 void dqput(struct dquot *dquot);
 int dquot_scan_active(struct super_block *sb,
@@ -36,24 +33,23 @@
 struct dquot *dquot_alloc(struct super_block *sb, int type);
 void dquot_destroy(struct dquot *dquot);
 
-int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_alloc_inode(const struct inode *inode, qsize_t number);
+int __dquot_alloc_space(struct inode *inode, qsize_t number,
+		int warn, int reserve);
+void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
 
-int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_claim_space(struct inode *inode, qsize_t number);
-void dquot_release_reserved_space(struct inode *inode, qsize_t number);
-qsize_t dquot_get_reserved_space(struct inode *inode);
+int dquot_alloc_inode(const struct inode *inode);
 
-int dquot_free_space(struct inode *inode, qsize_t number);
-int dquot_free_inode(const struct inode *inode, qsize_t number);
+int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
+void dquot_free_inode(const struct inode *inode);
 
-int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int dquot_commit(struct dquot *dquot);
 int dquot_acquire(struct dquot *dquot);
 int dquot_release(struct dquot *dquot);
 int dquot_commit_info(struct super_block *sb, int type);
 int dquot_mark_dquot_dirty(struct dquot *dquot);
 
+int dquot_file_open(struct inode *inode, struct file *file);
+
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
 int vfs_quota_enable(struct inode *inode, int type, int format_id,
@@ -64,14 +60,13 @@
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);
 int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
-int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_quota_sync(struct super_block *sb, int type, int wait);
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 
-void vfs_dq_drop(struct inode *inode);
-int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
 static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
@@ -83,53 +78,56 @@
  * Functions for checking status of quota
  */
 
-static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
+static inline bool sb_has_quota_usage_enabled(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_USAGE_ENABLED, type);
 }
 
-static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
+static inline bool sb_has_quota_limits_enabled(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
 }
 
-static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+static inline bool sb_has_quota_suspended(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_SUSPENDED, type);
 }
 
-static inline int sb_any_quota_suspended(struct super_block *sb)
+static inline unsigned sb_any_quota_suspended(struct super_block *sb)
 {
-	return sb_has_quota_suspended(sb, USRQUOTA) ||
-		sb_has_quota_suspended(sb, GRPQUOTA);
+	unsigned type, tmsk = 0;
+	for (type = 0; type < MAXQUOTAS; type++)
+		tmsk |= sb_has_quota_suspended(sb, type) << type;
+	return tmsk;
 }
 
 /* Does kernel know about any quota information for given sb + type? */
-static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+static inline bool sb_has_quota_loaded(struct super_block *sb, int type)
 {
 	/* Currently if anything is on, then quota usage is on as well */
 	return sb_has_quota_usage_enabled(sb, type);
 }
 
-static inline int sb_any_quota_loaded(struct super_block *sb)
+static inline unsigned sb_any_quota_loaded(struct super_block *sb)
 {
-	return sb_has_quota_loaded(sb, USRQUOTA) ||
-		sb_has_quota_loaded(sb, GRPQUOTA);
+	unsigned type, tmsk = 0;
+	for (type = 0; type < MAXQUOTAS; type++)
+		tmsk |= sb_has_quota_loaded(sb, type) << type;
+	return	tmsk;
 }
 
-static inline int sb_has_quota_active(struct super_block *sb, int type)
+static inline bool sb_has_quota_active(struct super_block *sb, int type)
 {
 	return sb_has_quota_loaded(sb, type) &&
 	       !sb_has_quota_suspended(sb, type);
 }
 
-static inline int sb_any_quota_active(struct super_block *sb)
+static inline unsigned sb_any_quota_active(struct super_block *sb)
 {
-	return sb_has_quota_active(sb, USRQUOTA) ||
-	       sb_has_quota_active(sb, GRPQUOTA);
+	return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb);
 }
 
 /*
@@ -141,122 +139,6 @@
 #define sb_dquot_ops (&dquot_operations)
 #define sb_quotactl_ops (&vfs_quotactl_ops)
 
-/* It is better to call this function outside of any transaction as it might
- * need a lot of space in journal for dquot structure allocation. */
-static inline void vfs_dq_init(struct inode *inode)
-{
-	BUG_ON(!inode->i_sb);
-	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
-		inode->i_sb->dq_op->initialize(inode, -1);
-}
-
-/* The following allocation/freeing/transfer functions *must* be called inside
- * a transaction (deadlocks possible otherwise) */
-static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_bytes(inode, nr);
-	return 0;
-}
-
-static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
-{
-	int ret;
-        if (!(ret =  vfs_dq_prealloc_space_nodirty(inode, nr)))
-		mark_inode_dirty(inode);
-	return ret;
-}
-
-static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_bytes(inode, nr);
-	return 0;
-}
-
-static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
-{
-	int ret;
-	if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
-		mark_inode_dirty(inode);
-	return ret;
-}
-
-static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
-			return 1;
-	}
-	return 0;
-}
-
-static inline int vfs_dq_alloc_inode(struct inode *inode)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Convert in-memory reserved quotas to real consumed quotas
- */
-static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
-			return 1;
-	} else
-		inode_add_bytes(inode, nr);
-
-	mark_inode_dirty(inode);
-	return 0;
-}
-
-/*
- * Release reserved (in-memory) quotas
- */
-static inline
-void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->release_rsv(inode, nr);
-}
-
-static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->free_space(inode, nr);
-	else
-		inode_sub_bytes(inode, nr);
-}
-
-static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
-{
-	vfs_dq_free_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-}
-
-static inline void vfs_dq_free_inode(struct inode *inode)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->free_inode(inode, 1);
-}
-
 /* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
@@ -316,28 +198,20 @@
 #define sb_dquot_ops				(NULL)
 #define sb_quotactl_ops				(NULL)
 
-static inline void vfs_dq_init(struct inode *inode)
+static inline void dquot_initialize(struct inode *inode)
 {
 }
 
-static inline void vfs_dq_drop(struct inode *inode)
+static inline void dquot_drop(struct inode *inode)
 {
 }
 
-static inline int vfs_dq_alloc_inode(struct inode *inode)
+static inline int dquot_alloc_inode(const struct inode *inode)
 {
 	return 0;
 }
 
-static inline void vfs_dq_free_inode(struct inode *inode)
-{
-}
-
-static inline void sync_quota_sb(struct super_block *sb, int type)
-{
-}
-
-static inline void writeout_quota_sb(struct super_block *sb, int type)
+static inline void dquot_free_inode(const struct inode *inode)
 {
 }
 
@@ -351,110 +225,116 @@
 	return 0;
 }
 
-static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
 	return 0;
 }
 
-static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
+static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
+		int warn, int reserve)
 {
-	inode_add_bytes(inode, nr);
+	if (!reserve)
+		inode_add_bytes(inode, number);
 	return 0;
 }
 
-static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
+static inline void __dquot_free_space(struct inode *inode, qsize_t number,
+		int reserve)
 {
-	vfs_dq_prealloc_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
+	if (!reserve)
+		inode_sub_bytes(inode, number);
+}
+
+static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
+{
+	inode_add_bytes(inode, number);
 	return 0;
 }
 
-static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	inode_add_bytes(inode, nr);
-	return 0;
-}
-
-static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
-{
-	vfs_dq_alloc_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-	return 0;
-}
-
-static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
-{
-	return 0;
-}
-
-static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
-{
-	return vfs_dq_alloc_space(inode, nr);
-}
-
-static inline
-int vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
-{
-	return 0;
-}
-
-static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	inode_sub_bytes(inode, nr);
-}
-
-static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
-{
-	vfs_dq_free_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-}	
+#define dquot_file_open		generic_file_open
 
 #endif /* CONFIG_QUOTA */
 
-static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_prealloc_space_nodirty(inode, nr << inode->i_blkbits);
+	return __dquot_alloc_space(inode, nr, 1, 0);
 }
 
-static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_prealloc_space(inode, nr << inode->i_blkbits);
+	int ret;
+
+	ret = dquot_alloc_space_nodirty(inode, nr);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits);
+	return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
-static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_alloc_space(inode, nr << inode->i_blkbits);
+	return dquot_alloc_space(inode, nr << inode->i_blkbits);
 }
 
-static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
+static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_reserve_space(inode, nr << inode->i_blkbits);
+	return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0);
 }
 
-static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr)
+static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_claim_space(inode, nr << inode->i_blkbits);
+	int ret;
+
+	ret = dquot_prealloc_block_nodirty(inode, nr);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline
-void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr)
+static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_release_reservation_space(inode, nr << inode->i_blkbits);
+	return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1);
 }
 
-static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space_nodirty(inode, nr << inode->i_blkbits);
+	int ret;
+
+	ret = dquot_claim_space_nodirty(inode, nr << inode->i_blkbits);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
+static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space(inode, nr << inode->i_blkbits);
+	__dquot_free_space(inode, nr, 0);
+}
+
+static inline void dquot_free_space(struct inode *inode, qsize_t nr)
+{
+	dquot_free_space_nodirty(inode, nr);
+	mark_inode_dirty(inode);
+}
+
+static inline void dquot_free_block_nodirty(struct inode *inode, qsize_t nr)
+{
+	dquot_free_space_nodirty(inode, nr << inode->i_blkbits);
+}
+
+static inline void dquot_free_block(struct inode *inode, qsize_t nr)
+{
+	dquot_free_space(inode, nr << inode->i_blkbits);
+}
+
+static inline void dquot_release_reservation_block(struct inode *inode,
+		qsize_t nr)
+{
+	__dquot_free_space(inode, nr << inode->i_blkbits, 1);
 }
 
 #endif /* _LINUX_QUOTAOPS_ */
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 030d922..28c9fd0 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -89,8 +89,9 @@
  * REGULATION_OUT Regulator output is out of regulation.
  * FAIL           Regulator output has failed.
  * OVER_TEMP      Regulator over temp.
- * FORCE_DISABLE  Regulator shut down by software.
+ * FORCE_DISABLE  Regulator forcibly shut down by software.
  * VOLTAGE_CHANGE Regulator voltage changed.
+ * DISABLE        Regulator was disabled.
  *
  * NOTE: These events can be OR'ed together when passed into handler.
  */
@@ -102,6 +103,7 @@
 #define REGULATOR_EVENT_OVER_TEMP		0x10
 #define REGULATOR_EVENT_FORCE_DISABLE		0x20
 #define REGULATOR_EVENT_VOLTAGE_CHANGE		0x40
+#define REGULATOR_EVENT_DISABLE 		0x80
 
 struct regulator;
 
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 31f2055..592cd7c 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -58,6 +58,9 @@
  * @get_optimum_mode: Get the most efficient operating mode for the regulator
  *                    when running with the specified parameters.
  *
+ * @enable_time: Time taken for the regulator voltage output voltage to
+ *               stabalise after being enabled, in microseconds.
+ *
  * @set_suspend_voltage: Set the voltage for the regulator when the system
  *                       is suspended.
  * @set_suspend_enable: Mark the regulator as enabled when the system is
@@ -93,6 +96,9 @@
 	int (*set_mode) (struct regulator_dev *, unsigned int mode);
 	unsigned int (*get_mode) (struct regulator_dev *);
 
+	/* Time taken to enable the regulator */
+	int (*enable_time) (struct regulator_dev *);
+
 	/* report regulator status ... most other accessors report
 	 * control inputs, this reports results of combining inputs
 	 * from Linux (and other sources) with the actual load.
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index e94a4a1..ffd7d50 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -25,6 +25,7 @@
  * @microvolts:		Output voltage of regulator
  * @gpio:		GPIO to use for enable control
  * 			set to -EINVAL if not used
+ * @startup_delay:	Start-up time in microseconds
  * @enable_high:	Polarity of enable GPIO
  *			1 = Active high, 0 = Active low
  * @enabled_at_boot:	Whether regulator has been enabled at
@@ -41,6 +42,7 @@
 	const char *supply_name;
 	int microvolts;
 	int gpio;
+	unsigned startup_delay;
 	unsigned enable_high:1;
 	unsigned enabled_at_boot:1;
 	struct regulator_init_data *init_data;
diff --git a/include/linux/regulator/max8649.h b/include/linux/regulator/max8649.h
new file mode 100644
index 0000000..417d14e
--- /dev/null
+++ b/include/linux/regulator/max8649.h
@@ -0,0 +1,44 @@
+/*
+ * Interface of Maxim max8649
+ *
+ * Copyright (C) 2009-2010 Marvell International Ltd.
+ *      Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_REGULATOR_MAX8649_H
+#define	__LINUX_REGULATOR_MAX8649_H
+
+#include <linux/regulator/machine.h>
+
+enum {
+	MAX8649_EXTCLK_26MHZ = 0,
+	MAX8649_EXTCLK_13MHZ,
+	MAX8649_EXTCLK_19MHZ,	/* 19.2MHz */
+};
+
+enum {
+	MAX8649_RAMP_32MV = 0,
+	MAX8649_RAMP_16MV,
+	MAX8649_RAMP_8MV,
+	MAX8649_RAMP_4MV,
+	MAX8649_RAMP_2MV,
+	MAX8649_RAMP_1MV,
+	MAX8649_RAMP_0_5MV,
+	MAX8649_RAMP_0_25MV,
+};
+
+struct max8649_platform_data {
+	struct regulator_init_data *regulator;
+
+	unsigned	mode:2;		/* bit[1:0] = VID1,VID0 */
+	unsigned	extclk_freq:2;
+	unsigned	extclk:1;
+	unsigned	ramp_timing:3;
+	unsigned	ramp_down:1;
+};
+
+#endif	/* __LINUX_REGULATOR_MAX8649_H */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 1ba3cf6..3b603f4 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2034,7 +2034,7 @@
 int reiserfs_find_actor(struct inode *inode, void *p);
 int reiserfs_init_locked_inode(struct inode *inode, void *p);
 void reiserfs_delete_inode(struct inode *inode);
-int reiserfs_write_inode(struct inode *inode, int);
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 int reiserfs_get_block(struct inode *inode, sector_t block,
 		       struct buffer_head *bh_result, int create);
 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 6508f0d..d7152b4 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -38,12 +38,27 @@
 void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
 void bc_release_request(struct rpc_task *);
 int bc_send(struct rpc_rqst *req);
+
+/*
+ * Determine if a shared backchannel is in use
+ */
+static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_server->bc_xprt)
+		return 1;
+	return 0;
+}
 #else /* CONFIG_NFS_V4_1 */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
 {
 	return 0;
 }
+
+static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
new file mode 100644
index 0000000..ae9ab13
--- /dev/null
+++ b/include/linux/vga_switcheroo.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * Licensed under GPLv2
+ *
+ * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
+ */
+
+#include <linux/fb.h>
+
+enum vga_switcheroo_state {
+	VGA_SWITCHEROO_OFF,
+	VGA_SWITCHEROO_ON,
+};
+
+enum vga_switcheroo_client_id {
+	VGA_SWITCHEROO_IGD,
+	VGA_SWITCHEROO_DIS,
+	VGA_SWITCHEROO_MAX_CLIENTS,
+};
+
+struct vga_switcheroo_handler {
+	int (*switchto)(enum vga_switcheroo_client_id id);
+	int (*power_state)(enum vga_switcheroo_client_id id,
+			   enum vga_switcheroo_state state);
+	int (*init)(void);
+	int (*get_client_id)(struct pci_dev *pdev);
+};
+
+
+#if defined(CONFIG_VGA_SWITCHEROO)
+void vga_switcheroo_unregister_client(struct pci_dev *dev);
+int vga_switcheroo_register_client(struct pci_dev *dev,
+				   void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+				   bool (*can_switch)(struct pci_dev *dev));
+
+void vga_switcheroo_client_fb_set(struct pci_dev *dev,
+				  struct fb_info *info);
+
+int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler);
+void vga_switcheroo_unregister_handler(void);
+
+int vga_switcheroo_process_delayed_switch(void);
+
+#else
+
+static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
+static inline int vga_switcheroo_register_client(struct pci_dev *dev,
+					  void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+					  bool (*can_switch)(struct pci_dev *dev)) { return 0; }
+static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
+static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
+static inline void vga_switcheroo_unregister_handler(void) {}
+static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
+
+#endif
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 095e10d..3322750 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -5,7 +5,4 @@
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
-/* Maximum number of virtio channels per partition (1 for now) */
-#define MAX_9P_CHAN	1
-
 #endif /* _LINUX_VIRTIO_9P_H */
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index fb00b32..52e1fff 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -29,6 +29,19 @@
 /* Number of requests per row */
 #define P9_ROW_MAXTAG 255
 
+/** enum p9_proto_versions - 9P protocol versions
+ * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u
+ * @p9_proto_2000u: 9P2000.u extension
+ * @p9_proto_2010L: 9P2010.L extension
+ */
+
+enum p9_proto_versions{
+	p9_proto_legacy = 0,
+	p9_proto_2000u = 1,
+	p9_proto_2010L = 2,
+};
+
+
 /**
  * enum p9_trans_status - different states of underlying transports
  * @Connected: transport is connected and healthy
@@ -111,6 +124,7 @@
  * @lock: protect @fidlist
  * @msize: maximum data size negotiated by protocol
  * @dotu: extension flags negotiated by protocol
+ * @proto_version: 9P protocol version to use
  * @trans_mod: module API instantiated with this client
  * @trans: tranport instance state and API
  * @conn: connection state information used by trans_fd
@@ -137,7 +151,7 @@
 struct p9_client {
 	spinlock_t lock; /* protect client structure */
 	int msize;
-	unsigned char dotu;
+	unsigned char proto_version;
 	struct p9_trans_module *trans_mod;
 	enum p9_trans_status status;
 	void *trans;
@@ -209,5 +223,7 @@
 int p9stat_read(char *, int, struct p9_wstat *, int);
 void p9stat_free(struct p9_wstat *);
 
+int p9_is_proto_dotu(struct p9_client *clnt);
+int p9_is_proto_dotl(struct p9_client *clnt);
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d0b6cd3..2aa6aa3 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -874,6 +874,107 @@
 		  __entry->mode, __entry->is_metadata, __entry->block)
 );
 
+TRACE_EVENT(ext4_da_update_reserve_space,
+	TP_PROTO(struct inode *inode, int used_blocks),
+
+	TP_ARGS(inode, used_blocks),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	umode_t, mode			)
+		__field(	__u64,	i_blocks		)
+		__field(	int,	used_blocks		)
+		__field(	int,	reserved_data_blocks	)
+		__field(	int,	reserved_meta_blocks	)
+		__field(	int,	allocated_meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->mode	= inode->i_mode;
+		__entry->i_blocks = inode->i_blocks;
+		__entry->used_blocks = used_blocks;
+		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode,  (unsigned long long) __entry->i_blocks,
+		  __entry->used_blocks, __entry->reserved_data_blocks,
+		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+);
+
+TRACE_EVENT(ext4_da_reserve_space,
+	TP_PROTO(struct inode *inode, int md_needed),
+
+	TP_ARGS(inode, md_needed),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	umode_t, mode			)
+		__field(	__u64,	i_blocks		)
+		__field(	int,	md_needed		)
+		__field(	int,	reserved_data_blocks	)
+		__field(	int,	reserved_meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->mode	= inode->i_mode;
+		__entry->i_blocks = inode->i_blocks;
+		__entry->md_needed = md_needed;
+		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->md_needed, __entry->reserved_data_blocks,
+		  __entry->reserved_meta_blocks)
+);
+
+TRACE_EVENT(ext4_da_release_space,
+	TP_PROTO(struct inode *inode, int freed_blocks),
+
+	TP_ARGS(inode, freed_blocks),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	umode_t, mode			)
+		__field(	__u64,	i_blocks		)
+		__field(	int,	freed_blocks		)
+		__field(	int,	reserved_data_blocks	)
+		__field(	int,	reserved_meta_blocks	)
+		__field(	int,	allocated_meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->mode	= inode->i_mode;
+		__entry->i_blocks = inode->i_blocks;
+		__entry->freed_blocks = freed_blocks;
+		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->freed_blocks, __entry->reserved_data_blocks,
+		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+);
+
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 96b370a..bf16545 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -199,6 +199,34 @@
 		  __entry->forced_to_close, __entry->written, __entry->dropped)
 );
 
+TRACE_EVENT(jbd2_cleanup_journal_tail,
+
+	TP_PROTO(journal_t *journal, tid_t first_tid,
+		 unsigned long block_nr, unsigned long freed),
+
+	TP_ARGS(journal, first_tid, block_nr, freed),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	tid_t,	tail_sequence		)
+		__field(	tid_t,	first_tid		)
+		__field(unsigned long,	block_nr		)
+		__field(unsigned long,	freed			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->tail_sequence	= journal->j_tail_sequence;
+		__entry->first_tid	= first_tid;
+		__entry->block_nr	= block_nr;
+		__entry->freed		= freed;
+	),
+
+	TP_printk("dev %s from %u to %u offset %lu freed %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
+		  __entry->first_tid, __entry->block_nr, __entry->freed)
+);
+
 #endif /* _TRACE_JBD2_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index dbe1084..b17d49d 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -145,6 +145,47 @@
 		  __entry->len, __entry->gpa, __entry->val)
 );
 
+#define kvm_fpu_load_symbol	\
+	{0, "unload"},		\
+	{1, "load"}
+
+TRACE_EVENT(kvm_fpu,
+	TP_PROTO(int load),
+	TP_ARGS(load),
+
+	TP_STRUCT__entry(
+		__field(	u32,	        load		)
+	),
+
+	TP_fast_assign(
+		__entry->load		= load;
+	),
+
+	TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
+);
+
+TRACE_EVENT(kvm_age_page,
+	TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
+	TP_ARGS(hva, slot, ref),
+
+	TP_STRUCT__entry(
+		__field(	u64,	hva		)
+		__field(	u64,	gfn		)
+		__field(	u8,	referenced	)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+		__entry->gfn		=
+		  slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT);
+		__entry->referenced	= ref;
+	),
+
+	TP_printk("hva %llx gfn %llx %s",
+		  __entry->hva, __entry->gfn,
+		  __entry->referenced ? "YOUNG" : "OLD")
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 614241b..2b10853 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -30,11 +30,7 @@
 	extern char * envp_init[];
 
 	sys_close(old_fd);sys_close(root_fd);
-	sys_close(0);sys_close(1);sys_close(2);
 	sys_setsid();
-	(void) sys_open("/dev/console",O_RDWR,0);
-	(void) sys_dup(0);
-	(void) sys_dup(0);
 	return kernel_execve(shell, argv, envp_init);
 }
 
diff --git a/init/main.c b/init/main.c
index 1809815..40aaa02 100644
--- a/init/main.c
+++ b/init/main.c
@@ -822,11 +822,6 @@
 	system_state = SYSTEM_RUNNING;
 	numa_default_policy();
 
-	if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
-		printk(KERN_WARNING "Warning: unable to open an initial console.\n");
-
-	(void) sys_dup(0);
-	(void) sys_dup(0);
 
 	current->signal->flags |= SIGNAL_UNKILLABLE;
 
@@ -889,6 +884,12 @@
 
 	do_basic_setup();
 
+	/* Open the /dev/console on the rootfs, this should never fail */
+	if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+		printk(KERN_WARNING "Warning: unable to open an initial console.\n");
+
+	(void) sys_dup(0);
+	(void) sys_dup(0);
 	/*
 	 * check if there is an early userspace init.  If yes, let it do all
 	 * the work
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c79bd57..b6cb064 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -134,7 +134,6 @@
 			init_waitqueue_head(&info->wait_q);
 			INIT_LIST_HEAD(&info->e_wait_q[0].list);
 			INIT_LIST_HEAD(&info->e_wait_q[1].list);
-			info->messages = NULL;
 			info->notify_owner = NULL;
 			info->qsize = 0;
 			info->user = NULL;	/* set when all is ok */
@@ -146,6 +145,10 @@
 				info->attr.mq_msgsize = attr->mq_msgsize;
 			}
 			mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+			info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+			if (!info->messages)
+				goto out_inode;
+
 			mq_bytes = (mq_msg_tblsz +
 				(info->attr.mq_maxmsg * info->attr.mq_msgsize));
 
@@ -154,18 +157,12 @@
 		 	    u->mq_bytes + mq_bytes >
 			    p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
 				spin_unlock(&mq_lock);
+				kfree(info->messages);
 				goto out_inode;
 			}
 			u->mq_bytes += mq_bytes;
 			spin_unlock(&mq_lock);
 
-			info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
-			if (!info->messages) {
-				spin_lock(&mq_lock);
-				u->mq_bytes -= mq_bytes;
-				spin_unlock(&mq_lock);
-				goto out_inode;
-			}
 			/* all is ok */
 			info->user = get_uid(u);
 		} else if (S_ISDIR(mode)) {
@@ -187,7 +184,7 @@
 {
 	struct inode *inode;
 	struct ipc_namespace *ns = data;
-	int error = 0;
+	int error;
 
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -205,7 +202,9 @@
 	if (!sb->s_root) {
 		iput(inode);
 		error = -ENOMEM;
+		goto out;
 	}
+	error = 0;
 
 out:
 	return error;
@@ -264,8 +263,9 @@
 
 	clear_inode(inode);
 
-	mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
-		   (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+	/* Total amount of bytes accounted for the mqueue */
+	mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *)
+	    + info->attr.mq_msgsize);
 	user = info->user;
 	if (user) {
 		spin_lock(&mq_lock);
@@ -604,8 +604,8 @@
 	/* check for overflow */
 	if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
 		return 0;
-	if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
-	    (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
+	if ((unsigned long)(attr->mq_maxmsg * (attr->mq_msgsize
+	    + sizeof (struct msg_msg *))) <
 	    (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
 		return 0;
 	return 1;
@@ -623,9 +623,10 @@
 	int ret;
 
 	if (attr) {
-		ret = -EINVAL;
-		if (!mq_attr_ok(ipc_ns, attr))
+		if (!mq_attr_ok(ipc_ns, attr)) {
+			ret = -EINVAL;
 			goto out;
+		}
 		/* store for use during create */
 		dentry->d_fsdata = attr;
 	}
@@ -659,24 +660,28 @@
 static struct file *do_open(struct ipc_namespace *ipc_ns,
 				struct dentry *dentry, int oflag)
 {
+	int ret;
 	const struct cred *cred = current_cred();
 
 	static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
 						  MAY_READ | MAY_WRITE };
 
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
-		dput(dentry);
-		mntput(ipc_ns->mq_mnt);
-		return ERR_PTR(-EINVAL);
+		ret = -EINVAL;
+		goto err;
 	}
 
 	if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
-		dput(dentry);
-		mntput(ipc_ns->mq_mnt);
-		return ERR_PTR(-EACCES);
+		ret = -EACCES;
+		goto err;
 	}
 
 	return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
+
+err:
+	dput(dentry);
+	mntput(ipc_ns->mq_mnt);
+	return ERR_PTR(ret);
 }
 
 SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
@@ -705,16 +710,17 @@
 	dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
-		goto out_err;
+		goto out_putfd;
 	}
 	mntget(ipc_ns->mq_mnt);
 
 	if (oflag & O_CREAT) {
 		if (dentry->d_inode) {	/* entry already exists */
 			audit_inode(name, dentry);
-			error = -EEXIST;
-			if (oflag & O_EXCL)
+			if (oflag & O_EXCL) {
+				error = -EEXIST;
 				goto out;
+			}
 			filp = do_open(ipc_ns, dentry, oflag);
 		} else {
 			filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
@@ -722,9 +728,10 @@
 						u_attr ? &attr : NULL);
 		}
 	} else {
-		error = -ENOENT;
-		if (!dentry->d_inode)
+		if (!dentry->d_inode) {
+			error = -ENOENT;
 			goto out;
+		}
 		audit_inode(name, dentry);
 		filp = do_open(ipc_ns, dentry, oflag);
 	}
@@ -742,7 +749,6 @@
 	mntput(ipc_ns->mq_mnt);
 out_putfd:
 	put_unused_fd(fd);
-out_err:
 	fd = error;
 out_upsem:
 	mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
@@ -872,19 +878,24 @@
 	audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
 	timeout = prepare_timeout(p);
 
-	ret = -EBADF;
 	filp = fget(mqdes);
-	if (unlikely(!filp))
+	if (unlikely(!filp)) {
+		ret = -EBADF;
 		goto out;
+	}
 
 	inode = filp->f_path.dentry->d_inode;
-	if (unlikely(filp->f_op != &mqueue_file_operations))
+	if (unlikely(filp->f_op != &mqueue_file_operations)) {
+		ret = -EBADF;
 		goto out_fput;
+	}
 	info = MQUEUE_I(inode);
 	audit_inode(NULL, filp->f_path.dentry);
 
-	if (unlikely(!(filp->f_mode & FMODE_WRITE)))
+	if (unlikely(!(filp->f_mode & FMODE_WRITE))) {
+		ret = -EBADF;
 		goto out_fput;
+	}
 
 	if (unlikely(msg_len > info->attr.mq_msgsize)) {
 		ret = -EMSGSIZE;
@@ -961,19 +972,24 @@
 	audit_mq_sendrecv(mqdes, msg_len, 0, p);
 	timeout = prepare_timeout(p);
 
-	ret = -EBADF;
 	filp = fget(mqdes);
-	if (unlikely(!filp))
+	if (unlikely(!filp)) {
+		ret = -EBADF;
 		goto out;
+	}
 
 	inode = filp->f_path.dentry->d_inode;
-	if (unlikely(filp->f_op != &mqueue_file_operations))
+	if (unlikely(filp->f_op != &mqueue_file_operations)) {
+		ret = -EBADF;
 		goto out_fput;
+	}
 	info = MQUEUE_I(inode);
 	audit_inode(NULL, filp->f_path.dentry);
 
-	if (unlikely(!(filp->f_mode & FMODE_READ)))
+	if (unlikely(!(filp->f_mode & FMODE_READ))) {
+		ret = -EBADF;
 		goto out_fput;
+	}
 
 	/* checks if buffer is big enough */
 	if (unlikely(msg_len < info->attr.mq_msgsize)) {
@@ -1063,13 +1079,14 @@
 
 			/* create the notify skb */
 			nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
-			ret = -ENOMEM;
-			if (!nc)
+			if (!nc) {
+				ret = -ENOMEM;
 				goto out;
-			ret = -EFAULT;
+			}
 			if (copy_from_user(nc->data,
 					notification.sigev_value.sival_ptr,
 					NOTIFY_COOKIE_LEN)) {
+				ret = -EFAULT;
 				goto out;
 			}
 
@@ -1078,9 +1095,10 @@
 			/* and attach it to the socket */
 retry:
 			filp = fget(notification.sigev_signo);
-			ret = -EBADF;
-			if (!filp)
+			if (!filp) {
+				ret = -EBADF;
 				goto out;
+			}
 			sock = netlink_getsockbyfilp(filp);
 			fput(filp);
 			if (IS_ERR(sock)) {
@@ -1092,7 +1110,7 @@
 			timeo = MAX_SCHEDULE_TIMEOUT;
 			ret = netlink_attachskb(sock, nc, &timeo, NULL);
 			if (ret == 1)
-		       		goto retry;
+				goto retry;
 			if (ret) {
 				sock = NULL;
 				nc = NULL;
@@ -1101,14 +1119,17 @@
 		}
 	}
 
-	ret = -EBADF;
 	filp = fget(mqdes);
-	if (!filp)
+	if (!filp) {
+		ret = -EBADF;
 		goto out;
+	}
 
 	inode = filp->f_path.dentry->d_inode;
-	if (unlikely(filp->f_op != &mqueue_file_operations))
+	if (unlikely(filp->f_op != &mqueue_file_operations)) {
+		ret = -EBADF;
 		goto out_fput;
+	}
 	info = MQUEUE_I(inode);
 
 	ret = 0;
@@ -1171,14 +1192,17 @@
 			return -EINVAL;
 	}
 
-	ret = -EBADF;
 	filp = fget(mqdes);
-	if (!filp)
+	if (!filp) {
+		ret = -EBADF;
 		goto out;
+	}
 
 	inode = filp->f_path.dentry->d_inode;
-	if (unlikely(filp->f_op != &mqueue_file_operations))
+	if (unlikely(filp->f_op != &mqueue_file_operations)) {
+		ret = -EBADF;
 		goto out_fput;
+	}
 	info = MQUEUE_I(inode);
 
 	spin_lock(&info->lock);
@@ -1272,7 +1296,7 @@
 	if (mqueue_inode_cachep == NULL)
 		return -ENOMEM;
 
-	/* ignore failues - they are not fatal */
+	/* ignore failures - they are not fatal */
 	mq_sysctl_table = mq_register_sysctl_table();
 
 	error = register_filesystem(&mqueue_fs_type);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 4b05bd9..028e856 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -548,6 +548,11 @@
 	return 0;
 }
 
+static int compare_root(struct vfsmount *mnt, void *arg)
+{
+	return mnt->mnt_root->d_inode == arg;
+}
+
 void audit_trim_trees(void)
 {
 	struct list_head cursor;
@@ -559,7 +564,6 @@
 		struct path path;
 		struct vfsmount *root_mnt;
 		struct node *node;
-		struct list_head list;
 		int err;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
@@ -577,24 +581,16 @@
 		if (!root_mnt)
 			goto skip_it;
 
-		list_add_tail(&list, &root_mnt->mnt_list);
 		spin_lock(&hash_lock);
 		list_for_each_entry(node, &tree->chunks, list) {
-			struct audit_chunk *chunk = find_chunk(node);
-			struct inode *inode = chunk->watch.inode;
-			struct vfsmount *mnt;
+			struct inode *inode = find_chunk(node)->watch.inode;
 			node->index |= 1U<<31;
-			list_for_each_entry(mnt, &list, mnt_list) {
-				if (mnt->mnt_root->d_inode == inode) {
-					node->index &= ~(1U<<31);
-					break;
-				}
-			}
+			if (iterate_mounts(compare_root, inode, root_mnt))
+				node->index &= ~(1U<<31);
 		}
 		spin_unlock(&hash_lock);
 		trim_marked(tree);
 		put_tree(tree);
-		list_del_init(&list);
 		drop_collected_mounts(root_mnt);
 skip_it:
 		mutex_lock(&audit_filter_mutex);
@@ -603,22 +599,6 @@
 	mutex_unlock(&audit_filter_mutex);
 }
 
-static int is_under(struct vfsmount *mnt, struct dentry *dentry,
-		    struct path *path)
-{
-	if (mnt != path->mnt) {
-		for (;;) {
-			if (mnt->mnt_parent == mnt)
-				return 0;
-			if (mnt->mnt_parent == path->mnt)
-					break;
-			mnt = mnt->mnt_parent;
-		}
-		dentry = mnt->mnt_mountpoint;
-	}
-	return is_subdir(dentry, path->dentry);
-}
-
 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
 {
 
@@ -638,13 +618,17 @@
 	put_tree(tree);
 }
 
+static int tag_mount(struct vfsmount *mnt, void *arg)
+{
+	return tag_chunk(mnt->mnt_root->d_inode, arg);
+}
+
 /* called with audit_filter_mutex */
 int audit_add_tree_rule(struct audit_krule *rule)
 {
 	struct audit_tree *seed = rule->tree, *tree;
 	struct path path;
-	struct vfsmount *mnt, *p;
-	struct list_head list;
+	struct vfsmount *mnt;
 	int err;
 
 	list_for_each_entry(tree, &tree_list, list) {
@@ -670,16 +654,9 @@
 		err = -ENOMEM;
 		goto Err;
 	}
-	list_add_tail(&list, &mnt->mnt_list);
 
 	get_tree(tree);
-	list_for_each_entry(p, &list, mnt_list) {
-		err = tag_chunk(p->mnt_root->d_inode, tree);
-		if (err)
-			break;
-	}
-
-	list_del(&list);
+	err = iterate_mounts(tag_mount, tree, mnt);
 	drop_collected_mounts(mnt);
 
 	if (!err) {
@@ -714,31 +691,23 @@
 {
 	struct list_head cursor, barrier;
 	int failed = 0;
-	struct path path;
+	struct path path1, path2;
 	struct vfsmount *tagged;
-	struct list_head list;
-	struct vfsmount *mnt;
-	struct dentry *dentry;
 	int err;
 
-	err = kern_path(new, 0, &path);
+	err = kern_path(new, 0, &path2);
 	if (err)
 		return err;
-	tagged = collect_mounts(&path);
-	path_put(&path);
+	tagged = collect_mounts(&path2);
+	path_put(&path2);
 	if (!tagged)
 		return -ENOMEM;
 
-	err = kern_path(old, 0, &path);
+	err = kern_path(old, 0, &path1);
 	if (err) {
 		drop_collected_mounts(tagged);
 		return err;
 	}
-	mnt = mntget(path.mnt);
-	dentry = dget(path.dentry);
-	path_put(&path);
-
-	list_add_tail(&list, &tagged->mnt_list);
 
 	mutex_lock(&audit_filter_mutex);
 	list_add(&barrier, &tree_list);
@@ -746,7 +715,7 @@
 
 	while (cursor.next != &tree_list) {
 		struct audit_tree *tree;
-		struct vfsmount *p;
+		int good_one = 0;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
 		get_tree(tree);
@@ -754,30 +723,19 @@
 		list_add(&cursor, &tree->list);
 		mutex_unlock(&audit_filter_mutex);
 
-		err = kern_path(tree->pathname, 0, &path);
-		if (err) {
+		err = kern_path(tree->pathname, 0, &path2);
+		if (!err) {
+			good_one = path_is_under(&path1, &path2);
+			path_put(&path2);
+		}
+
+		if (!good_one) {
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
 			continue;
 		}
 
-		spin_lock(&vfsmount_lock);
-		if (!is_under(mnt, dentry, &path)) {
-			spin_unlock(&vfsmount_lock);
-			path_put(&path);
-			put_tree(tree);
-			mutex_lock(&audit_filter_mutex);
-			continue;
-		}
-		spin_unlock(&vfsmount_lock);
-		path_put(&path);
-
-		list_for_each_entry(p, &list, mnt_list) {
-			failed = tag_chunk(p->mnt_root->d_inode, tree);
-			if (failed)
-				break;
-		}
-
+		failed = iterate_mounts(tag_mount, tree, tagged);
 		if (failed) {
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
@@ -818,10 +776,8 @@
 	}
 	list_del(&barrier);
 	list_del(&cursor);
-	list_del(&list);
 	mutex_unlock(&audit_filter_mutex);
-	dput(dentry);
-	mntput(mnt);
+	path_put(&path1);
 	drop_collected_mounts(tagged);
 	return failed;
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fc0f928..f3a461c 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1988,7 +1988,6 @@
 
 /**
  * audit_inode_child - collect inode info for created/removed objects
- * @dname: inode's dentry name
  * @dentry: dentry being audited
  * @parent: inode of dentry parent
  *
@@ -2000,13 +1999,14 @@
  * must be hooked prior, in order to capture the target inode during
  * unsuccessful attempts.
  */
-void __audit_inode_child(const char *dname, const struct dentry *dentry,
+void __audit_inode_child(const struct dentry *dentry,
 			 const struct inode *parent)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
 	const char *found_parent = NULL, *found_child = NULL;
 	const struct inode *inode = dentry->d_inode;
+	const char *dname = dentry->d_name.name;
 	int dirlen = 0;
 
 	if (!context->in_syscall)
@@ -2014,9 +2014,6 @@
 
 	if (inode)
 		handle_one(inode);
-	/* determine matching parent */
-	if (!dname)
-		goto add_names;
 
 	/* parent is more likely, look for it first */
 	for (idx = 0; idx < context->name_count; idx++) {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ccec774..fa034d2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -42,9 +42,11 @@
 #include <linux/freezer.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/sysctl.h>
 #include <linux/kdebug.h>
 #include <linux/memory.h>
 #include <linux/ftrace.h>
+#include <linux/cpu.h>
 
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
@@ -105,57 +107,74 @@
  * stepping on the instruction on a vmalloced/kmalloced/data page
  * is a recipe for disaster
  */
-#define INSNS_PER_PAGE	(PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
-
 struct kprobe_insn_page {
 	struct list_head list;
 	kprobe_opcode_t *insns;		/* Page of instruction slots */
-	char slot_used[INSNS_PER_PAGE];
 	int nused;
 	int ngarbage;
+	char slot_used[];
 };
 
+#define KPROBE_INSN_PAGE_SIZE(slots)			\
+	(offsetof(struct kprobe_insn_page, slot_used) +	\
+	 (sizeof(char) * (slots)))
+
+struct kprobe_insn_cache {
+	struct list_head pages;	/* list of kprobe_insn_page */
+	size_t insn_size;	/* size of instruction slot */
+	int nr_garbage;
+};
+
+static int slots_per_page(struct kprobe_insn_cache *c)
+{
+	return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
+}
+
 enum kprobe_slot_state {
 	SLOT_CLEAN = 0,
 	SLOT_DIRTY = 1,
 	SLOT_USED = 2,
 };
 
-static DEFINE_MUTEX(kprobe_insn_mutex);	/* Protects kprobe_insn_pages */
-static LIST_HEAD(kprobe_insn_pages);
-static int kprobe_garbage_slots;
-static int collect_garbage_slots(void);
+static DEFINE_MUTEX(kprobe_insn_mutex);	/* Protects kprobe_insn_slots */
+static struct kprobe_insn_cache kprobe_insn_slots = {
+	.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
+	.insn_size = MAX_INSN_SIZE,
+	.nr_garbage = 0,
+};
+static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
 
 /**
  * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-static kprobe_opcode_t __kprobes *__get_insn_slot(void)
+static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
 {
 	struct kprobe_insn_page *kip;
 
  retry:
-	list_for_each_entry(kip, &kprobe_insn_pages, list) {
-		if (kip->nused < INSNS_PER_PAGE) {
+	list_for_each_entry(kip, &c->pages, list) {
+		if (kip->nused < slots_per_page(c)) {
 			int i;
-			for (i = 0; i < INSNS_PER_PAGE; i++) {
+			for (i = 0; i < slots_per_page(c); i++) {
 				if (kip->slot_used[i] == SLOT_CLEAN) {
 					kip->slot_used[i] = SLOT_USED;
 					kip->nused++;
-					return kip->insns + (i * MAX_INSN_SIZE);
+					return kip->insns + (i * c->insn_size);
 				}
 			}
-			/* Surprise!  No unused slots.  Fix kip->nused. */
-			kip->nused = INSNS_PER_PAGE;
+			/* kip->nused is broken. Fix it. */
+			kip->nused = slots_per_page(c);
+			WARN_ON(1);
 		}
 	}
 
 	/* If there are any garbage slots, collect it and try again. */
-	if (kprobe_garbage_slots && collect_garbage_slots() == 0) {
+	if (c->nr_garbage && collect_garbage_slots(c) == 0)
 		goto retry;
-	}
-	/* All out of space.  Need to allocate a new page. Use slot 0. */
-	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
+
+	/* All out of space.  Need to allocate a new page. */
+	kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
 	if (!kip)
 		return NULL;
 
@@ -170,20 +189,23 @@
 		return NULL;
 	}
 	INIT_LIST_HEAD(&kip->list);
-	list_add(&kip->list, &kprobe_insn_pages);
-	memset(kip->slot_used, SLOT_CLEAN, INSNS_PER_PAGE);
+	memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
 	kip->slot_used[0] = SLOT_USED;
 	kip->nused = 1;
 	kip->ngarbage = 0;
+	list_add(&kip->list, &c->pages);
 	return kip->insns;
 }
 
+
 kprobe_opcode_t __kprobes *get_insn_slot(void)
 {
-	kprobe_opcode_t *ret;
+	kprobe_opcode_t *ret = NULL;
+
 	mutex_lock(&kprobe_insn_mutex);
-	ret = __get_insn_slot();
+	ret = __get_insn_slot(&kprobe_insn_slots);
 	mutex_unlock(&kprobe_insn_mutex);
+
 	return ret;
 }
 
@@ -199,7 +221,7 @@
 		 * so as not to have to set it up again the
 		 * next time somebody inserts a probe.
 		 */
-		if (!list_is_singular(&kprobe_insn_pages)) {
+		if (!list_is_singular(&kip->list)) {
 			list_del(&kip->list);
 			module_free(NULL, kip->insns);
 			kfree(kip);
@@ -209,51 +231,84 @@
 	return 0;
 }
 
-static int __kprobes collect_garbage_slots(void)
+static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
 {
 	struct kprobe_insn_page *kip, *next;
 
 	/* Ensure no-one is interrupted on the garbages */
 	synchronize_sched();
 
-	list_for_each_entry_safe(kip, next, &kprobe_insn_pages, list) {
+	list_for_each_entry_safe(kip, next, &c->pages, list) {
 		int i;
 		if (kip->ngarbage == 0)
 			continue;
 		kip->ngarbage = 0;	/* we will collect all garbages */
-		for (i = 0; i < INSNS_PER_PAGE; i++) {
+		for (i = 0; i < slots_per_page(c); i++) {
 			if (kip->slot_used[i] == SLOT_DIRTY &&
 			    collect_one_slot(kip, i))
 				break;
 		}
 	}
-	kprobe_garbage_slots = 0;
+	c->nr_garbage = 0;
 	return 0;
 }
 
+static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
+				       kprobe_opcode_t *slot, int dirty)
+{
+	struct kprobe_insn_page *kip;
+
+	list_for_each_entry(kip, &c->pages, list) {
+		long idx = ((long)slot - (long)kip->insns) / c->insn_size;
+		if (idx >= 0 && idx < slots_per_page(c)) {
+			WARN_ON(kip->slot_used[idx] != SLOT_USED);
+			if (dirty) {
+				kip->slot_used[idx] = SLOT_DIRTY;
+				kip->ngarbage++;
+				if (++c->nr_garbage > slots_per_page(c))
+					collect_garbage_slots(c);
+			} else
+				collect_one_slot(kip, idx);
+			return;
+		}
+	}
+	/* Could not free this slot. */
+	WARN_ON(1);
+}
+
 void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
 {
-	struct kprobe_insn_page *kip;
-
 	mutex_lock(&kprobe_insn_mutex);
-	list_for_each_entry(kip, &kprobe_insn_pages, list) {
-		if (kip->insns <= slot &&
-		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
-			int i = (slot - kip->insns) / MAX_INSN_SIZE;
-			if (dirty) {
-				kip->slot_used[i] = SLOT_DIRTY;
-				kip->ngarbage++;
-			} else
-				collect_one_slot(kip, i);
-			break;
-		}
-	}
-
-	if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
-		collect_garbage_slots();
-
+	__free_insn_slot(&kprobe_insn_slots, slot, dirty);
 	mutex_unlock(&kprobe_insn_mutex);
 }
+#ifdef CONFIG_OPTPROBES
+/* For optimized_kprobe buffer */
+static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
+static struct kprobe_insn_cache kprobe_optinsn_slots = {
+	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
+	/* .insn_size is initialized later */
+	.nr_garbage = 0,
+};
+/* Get a slot for optimized_kprobe buffer */
+kprobe_opcode_t __kprobes *get_optinsn_slot(void)
+{
+	kprobe_opcode_t *ret = NULL;
+
+	mutex_lock(&kprobe_optinsn_mutex);
+	ret = __get_insn_slot(&kprobe_optinsn_slots);
+	mutex_unlock(&kprobe_optinsn_mutex);
+
+	return ret;
+}
+
+void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
+{
+	mutex_lock(&kprobe_optinsn_mutex);
+	__free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
+	mutex_unlock(&kprobe_optinsn_mutex);
+}
+#endif
 #endif
 
 /* We have preemption disabled.. so it is safe to use __ versions */
@@ -284,23 +339,401 @@
 		if (p->addr == addr)
 			return p;
 	}
+
 	return NULL;
 }
 
+static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+/* Return true if the kprobe is an aggregator */
+static inline int kprobe_aggrprobe(struct kprobe *p)
+{
+	return p->pre_handler == aggr_pre_handler;
+}
+
+/*
+ * Keep all fields in the kprobe consistent
+ */
+static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+{
+	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
+	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+}
+
+#ifdef CONFIG_OPTPROBES
+/* NOTE: change this value only with kprobe_mutex held */
+static bool kprobes_allow_optimization;
+
+/*
+ * Call all pre_handler on the list, but ignores its return value.
+ * This must be called from arch-dep optimized caller.
+ */
+void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe *kp;
+
+	list_for_each_entry_rcu(kp, &p->list, list) {
+		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
+			set_kprobe_instance(kp);
+			kp->pre_handler(kp, regs);
+		}
+		reset_kprobe_instance();
+	}
+}
+
+/* Return true(!0) if the kprobe is ready for optimization. */
+static inline int kprobe_optready(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	if (kprobe_aggrprobe(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		return arch_prepared_optinsn(&op->optinsn);
+	}
+
+	return 0;
+}
+
+/*
+ * Return an optimized kprobe whose optimizing code replaces
+ * instructions including addr (exclude breakpoint).
+ */
+struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
+{
+	int i;
+	struct kprobe *p = NULL;
+	struct optimized_kprobe *op;
+
+	/* Don't check i == 0, since that is a breakpoint case. */
+	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
+		p = get_kprobe((void *)(addr - i));
+
+	if (p && kprobe_optready(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		if (arch_within_optimized_kprobe(op, addr))
+			return p;
+	}
+
+	return NULL;
+}
+
+/* Optimization staging list, protected by kprobe_mutex */
+static LIST_HEAD(optimizing_list);
+
+static void kprobe_optimizer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+#define OPTIMIZE_DELAY 5
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+	struct optimized_kprobe *op, *tmp;
+
+	/* Lock modules while optimizing kprobes */
+	mutex_lock(&module_mutex);
+	mutex_lock(&kprobe_mutex);
+	if (kprobes_all_disarmed || !kprobes_allow_optimization)
+		goto end;
+
+	/*
+	 * Wait for quiesence period to ensure all running interrupts
+	 * are done. Because optprobe may modify multiple instructions
+	 * there is a chance that Nth instruction is interrupted. In that
+	 * case, running interrupt can return to 2nd-Nth byte of jump
+	 * instruction. This wait is for avoiding it.
+	 */
+	synchronize_sched();
+
+	/*
+	 * The optimization/unoptimization refers online_cpus via
+	 * stop_machine() and cpu-hotplug modifies online_cpus.
+	 * And same time, text_mutex will be held in cpu-hotplug and here.
+	 * This combination can cause a deadlock (cpu-hotplug try to lock
+	 * text_mutex but stop_machine can not be done because online_cpus
+	 * has been changed)
+	 * To avoid this deadlock, we need to call get_online_cpus()
+	 * for preventing cpu-hotplug outside of text_mutex locking.
+	 */
+	get_online_cpus();
+	mutex_lock(&text_mutex);
+	list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
+		WARN_ON(kprobe_disabled(&op->kp));
+		if (arch_optimize_kprobe(op) < 0)
+			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+		list_del_init(&op->list);
+	}
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+end:
+	mutex_unlock(&kprobe_mutex);
+	mutex_unlock(&module_mutex);
+}
+
+/* Optimize kprobe if p is ready to be optimized */
+static __kprobes void optimize_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	/* Check if the kprobe is disabled or not ready for optimization. */
+	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
+	    (kprobe_disabled(p) || kprobes_all_disarmed))
+		return;
+
+	/* Both of break_handler and post_handler are not supported. */
+	if (p->break_handler || p->post_handler)
+		return;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+
+	/* Check there is no other kprobes at the optimized instructions */
+	if (arch_check_optimized_kprobe(op) < 0)
+		return;
+
+	/* Check if it is already optimized. */
+	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
+		return;
+
+	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
+	list_add(&op->list, &optimizing_list);
+	if (!delayed_work_pending(&optimizing_work))
+		schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Unoptimize a kprobe if p is optimized */
+static __kprobes void unoptimize_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		if (!list_empty(&op->list))
+			/* Dequeue from the optimization queue */
+			list_del_init(&op->list);
+		else
+			/* Replace jump with break */
+			arch_unoptimize_kprobe(op);
+		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	}
+}
+
+/* Remove optimized instructions */
+static void __kprobes kill_optimized_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	if (!list_empty(&op->list)) {
+		/* Dequeue from the optimization queue */
+		list_del_init(&op->list);
+		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	}
+	/* Don't unoptimize, because the target code will be freed. */
+	arch_remove_optimized_kprobe(op);
+}
+
+/* Try to prepare optimized instructions */
+static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	arch_prepare_optimized_kprobe(op);
+}
+
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	arch_remove_optimized_kprobe(op);
+	kfree(op);
+}
+
+/* Allocate new optimized_kprobe and try to prepare optimized instructions */
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
+	if (!op)
+		return NULL;
+
+	INIT_LIST_HEAD(&op->list);
+	op->kp.addr = p->addr;
+	arch_prepare_optimized_kprobe(op);
+
+	return &op->kp;
+}
+
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
+
+/*
+ * Prepare an optimized_kprobe and optimize it
+ * NOTE: p must be a normal registered kprobe
+ */
+static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
+{
+	struct kprobe *ap;
+	struct optimized_kprobe *op;
+
+	ap = alloc_aggr_kprobe(p);
+	if (!ap)
+		return;
+
+	op = container_of(ap, struct optimized_kprobe, kp);
+	if (!arch_prepared_optinsn(&op->optinsn)) {
+		/* If failed to setup optimizing, fallback to kprobe */
+		free_aggr_kprobe(ap);
+		return;
+	}
+
+	init_aggr_kprobe(ap, p);
+	optimize_kprobe(ap);
+}
+
+#ifdef CONFIG_SYSCTL
+static void __kprobes optimize_all_kprobes(void)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+
+	/* If optimization is already allowed, just return */
+	if (kprobes_allow_optimization)
+		return;
+
+	kprobes_allow_optimization = true;
+	mutex_lock(&text_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist)
+			if (!kprobe_disabled(p))
+				optimize_kprobe(p);
+	}
+	mutex_unlock(&text_mutex);
+	printk(KERN_INFO "Kprobes globally optimized\n");
+}
+
+static void __kprobes unoptimize_all_kprobes(void)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+
+	/* If optimization is already prohibited, just return */
+	if (!kprobes_allow_optimization)
+		return;
+
+	kprobes_allow_optimization = false;
+	printk(KERN_INFO "Kprobes globally unoptimized\n");
+	get_online_cpus();	/* For avoiding text_mutex deadlock */
+	mutex_lock(&text_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist) {
+			if (!kprobe_disabled(p))
+				unoptimize_kprobe(p);
+		}
+	}
+
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+	/* Allow all currently running kprobes to complete */
+	synchronize_sched();
+}
+
+int sysctl_kprobes_optimization;
+int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
+				      void __user *buffer, size_t *length,
+				      loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&kprobe_mutex);
+	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+
+	if (sysctl_kprobes_optimization)
+		optimize_all_kprobes();
+	else
+		unoptimize_all_kprobes();
+	mutex_unlock(&kprobe_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_SYSCTL */
+
+static void __kprobes __arm_kprobe(struct kprobe *p)
+{
+	struct kprobe *old_p;
+
+	/* Check collision with other optimized kprobes */
+	old_p = get_optimized_kprobe((unsigned long)p->addr);
+	if (unlikely(old_p))
+		unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+
+	arch_arm_kprobe(p);
+	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
+}
+
+static void __kprobes __disarm_kprobe(struct kprobe *p)
+{
+	struct kprobe *old_p;
+
+	unoptimize_kprobe(p);	/* Try to unoptimize */
+	arch_disarm_kprobe(p);
+
+	/* If another kprobe was blocked, optimize it. */
+	old_p = get_optimized_kprobe((unsigned long)p->addr);
+	if (unlikely(old_p))
+		optimize_kprobe(old_p);
+}
+
+#else /* !CONFIG_OPTPROBES */
+
+#define optimize_kprobe(p)			do {} while (0)
+#define unoptimize_kprobe(p)			do {} while (0)
+#define kill_optimized_kprobe(p)		do {} while (0)
+#define prepare_optimized_kprobe(p)		do {} while (0)
+#define try_to_optimize_kprobe(p)		do {} while (0)
+#define __arm_kprobe(p)				arch_arm_kprobe(p)
+#define __disarm_kprobe(p)			arch_disarm_kprobe(p)
+
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+	kfree(p);
+}
+
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+}
+#endif /* CONFIG_OPTPROBES */
+
 /* Arm a kprobe with text_mutex */
 static void __kprobes arm_kprobe(struct kprobe *kp)
 {
+	/*
+	 * Here, since __arm_kprobe() doesn't use stop_machine(),
+	 * this doesn't cause deadlock on text_mutex. So, we don't
+	 * need get_online_cpus().
+	 */
 	mutex_lock(&text_mutex);
-	arch_arm_kprobe(kp);
+	__arm_kprobe(kp);
 	mutex_unlock(&text_mutex);
 }
 
 /* Disarm a kprobe with text_mutex */
 static void __kprobes disarm_kprobe(struct kprobe *kp)
 {
+	get_online_cpus();	/* For avoiding text_mutex deadlock */
 	mutex_lock(&text_mutex);
-	arch_disarm_kprobe(kp);
+	__disarm_kprobe(kp);
 	mutex_unlock(&text_mutex);
+	put_online_cpus();
 }
 
 /*
@@ -369,7 +802,7 @@
 void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
 {
 	struct kprobe *kp;
-	if (p->pre_handler != aggr_pre_handler) {
+	if (!kprobe_aggrprobe(p)) {
 		p->nmissed++;
 	} else {
 		list_for_each_entry_rcu(kp, &p->list, list)
@@ -493,21 +926,16 @@
 }
 
 /*
- * Keep all fields in the kprobe consistent
- */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
-{
-	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
-	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
-}
-
-/*
 * Add the new probe to ap->list. Fail if this is the
 * second jprobe at the address - two jprobes can't coexist
 */
 static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
+
+	if (p->break_handler || p->post_handler)
+		unoptimize_kprobe(ap);	/* Fall back to normal kprobe */
+
 	if (p->break_handler) {
 		if (ap->break_handler)
 			return -EEXIST;
@@ -522,7 +950,7 @@
 		ap->flags &= ~KPROBE_FLAG_DISABLED;
 		if (!kprobes_all_disarmed)
 			/* Arm the breakpoint again. */
-			arm_kprobe(ap);
+			__arm_kprobe(ap);
 	}
 	return 0;
 }
@@ -531,12 +959,13 @@
  * Fill in the required fields of the "manager kprobe". Replace the
  * earlier kprobe in the hlist with the manager kprobe
  */
-static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
+	/* Copy p's insn slot to ap */
 	copy_kprobe(p, ap);
 	flush_insn_slot(ap);
 	ap->addr = p->addr;
-	ap->flags = p->flags;
+	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
 	ap->pre_handler = aggr_pre_handler;
 	ap->fault_handler = aggr_fault_handler;
 	/* We don't care the kprobe which has gone. */
@@ -546,8 +975,9 @@
 		ap->break_handler = aggr_break_handler;
 
 	INIT_LIST_HEAD(&ap->list);
-	list_add_rcu(&p->list, &ap->list);
+	INIT_HLIST_NODE(&ap->hlist);
 
+	list_add_rcu(&p->list, &ap->list);
 	hlist_replace_rcu(&p->hlist, &ap->hlist);
 }
 
@@ -561,12 +991,12 @@
 	int ret = 0;
 	struct kprobe *ap = old_p;
 
-	if (old_p->pre_handler != aggr_pre_handler) {
-		/* If old_p is not an aggr_probe, create new aggr_kprobe. */
-		ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+	if (!kprobe_aggrprobe(old_p)) {
+		/* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
+		ap = alloc_aggr_kprobe(old_p);
 		if (!ap)
 			return -ENOMEM;
-		add_aggr_kprobe(ap, old_p);
+		init_aggr_kprobe(ap, old_p);
 	}
 
 	if (kprobe_gone(ap)) {
@@ -585,6 +1015,9 @@
 			 */
 			return ret;
 
+		/* Prepare optimized instructions if possible. */
+		prepare_optimized_kprobe(ap);
+
 		/*
 		 * Clear gone flag to prevent allocating new slot again, and
 		 * set disabled flag because it is not armed yet.
@@ -593,6 +1026,7 @@
 			    | KPROBE_FLAG_DISABLED;
 	}
 
+	/* Copy ap's insn slot to p */
 	copy_kprobe(ap, p);
 	return add_new_kprobe(ap, p);
 }
@@ -743,27 +1177,34 @@
 	p->nmissed = 0;
 	INIT_LIST_HEAD(&p->list);
 	mutex_lock(&kprobe_mutex);
+
+	get_online_cpus();	/* For avoiding text_mutex deadlock. */
+	mutex_lock(&text_mutex);
+
 	old_p = get_kprobe(p->addr);
 	if (old_p) {
+		/* Since this may unoptimize old_p, locking text_mutex. */
 		ret = register_aggr_kprobe(old_p, p);
 		goto out;
 	}
 
-	mutex_lock(&text_mutex);
 	ret = arch_prepare_kprobe(p);
 	if (ret)
-		goto out_unlock_text;
+		goto out;
 
 	INIT_HLIST_NODE(&p->hlist);
 	hlist_add_head_rcu(&p->hlist,
 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
 	if (!kprobes_all_disarmed && !kprobe_disabled(p))
-		arch_arm_kprobe(p);
+		__arm_kprobe(p);
 
-out_unlock_text:
-	mutex_unlock(&text_mutex);
+	/* Try to optimize kprobe */
+	try_to_optimize_kprobe(p);
+
 out:
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
 	mutex_unlock(&kprobe_mutex);
 
 	if (probed_mod)
@@ -785,7 +1226,7 @@
 		return -EINVAL;
 
 	if (old_p == p ||
-	    (old_p->pre_handler == aggr_pre_handler &&
+	    (kprobe_aggrprobe(old_p) &&
 	     list_is_singular(&old_p->list))) {
 		/*
 		 * Only probe on the hash list. Disarm only if kprobes are
@@ -793,7 +1234,7 @@
 		 * already have been removed. We save on flushing icache.
 		 */
 		if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
-			disarm_kprobe(p);
+			disarm_kprobe(old_p);
 		hlist_del_rcu(&old_p->hlist);
 	} else {
 		if (p->break_handler && !kprobe_gone(p))
@@ -809,8 +1250,13 @@
 		list_del_rcu(&p->list);
 		if (!kprobe_disabled(old_p)) {
 			try_to_disable_aggr_kprobe(old_p);
-			if (!kprobes_all_disarmed && kprobe_disabled(old_p))
-				disarm_kprobe(old_p);
+			if (!kprobes_all_disarmed) {
+				if (kprobe_disabled(old_p))
+					disarm_kprobe(old_p);
+				else
+					/* Try to optimize this probe again */
+					optimize_kprobe(old_p);
+			}
 		}
 	}
 	return 0;
@@ -827,7 +1273,7 @@
 		old_p = list_entry(p->list.next, struct kprobe, list);
 		list_del(&p->list);
 		arch_remove_kprobe(old_p);
-		kfree(old_p);
+		free_aggr_kprobe(old_p);
 	}
 }
 
@@ -1123,7 +1569,7 @@
 	struct kprobe *kp;
 
 	p->flags |= KPROBE_FLAG_GONE;
-	if (p->pre_handler == aggr_pre_handler) {
+	if (kprobe_aggrprobe(p)) {
 		/*
 		 * If this is an aggr_kprobe, we have to list all the
 		 * chained probes and mark them GONE.
@@ -1132,6 +1578,7 @@
 			kp->flags |= KPROBE_FLAG_GONE;
 		p->post_handler = NULL;
 		p->break_handler = NULL;
+		kill_optimized_kprobe(p);
 	}
 	/*
 	 * Here, we can remove insn_slot safely, because no thread calls
@@ -1241,6 +1688,15 @@
 		}
 	}
 
+#if defined(CONFIG_OPTPROBES)
+#if defined(__ARCH_WANT_KPROBES_INSN_SLOT)
+	/* Init kprobe_optinsn_slots */
+	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
+#endif
+	/* By default, kprobes can be optimized */
+	kprobes_allow_optimization = true;
+#endif
+
 	/* By default, kprobes are armed */
 	kprobes_all_disarmed = false;
 
@@ -1259,7 +1715,7 @@
 
 #ifdef CONFIG_DEBUG_FS
 static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
-		const char *sym, int offset,char *modname)
+		const char *sym, int offset, char *modname, struct kprobe *pp)
 {
 	char *kprobe_type;
 
@@ -1269,19 +1725,21 @@
 		kprobe_type = "j";
 	else
 		kprobe_type = "k";
+
 	if (sym)
-		seq_printf(pi, "%p  %s  %s+0x%x  %s %s%s\n",
+		seq_printf(pi, "%p  %s  %s+0x%x  %s ",
 			p->addr, kprobe_type, sym, offset,
-			(modname ? modname : " "),
-			(kprobe_gone(p) ? "[GONE]" : ""),
-			((kprobe_disabled(p) && !kprobe_gone(p)) ?
-			 "[DISABLED]" : ""));
+			(modname ? modname : " "));
 	else
-		seq_printf(pi, "%p  %s  %p %s%s\n",
-			p->addr, kprobe_type, p->addr,
-			(kprobe_gone(p) ? "[GONE]" : ""),
-			((kprobe_disabled(p) && !kprobe_gone(p)) ?
-			 "[DISABLED]" : ""));
+		seq_printf(pi, "%p  %s  %p ",
+			p->addr, kprobe_type, p->addr);
+
+	if (!pp)
+		pp = p;
+	seq_printf(pi, "%s%s%s\n",
+		(kprobe_gone(p) ? "[GONE]" : ""),
+		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
+		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
 }
 
 static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1317,11 +1775,11 @@
 	hlist_for_each_entry_rcu(p, node, head, hlist) {
 		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
 					&offset, &modname, namebuf);
-		if (p->pre_handler == aggr_pre_handler) {
+		if (kprobe_aggrprobe(p)) {
 			list_for_each_entry_rcu(kp, &p->list, list)
-				report_probe(pi, kp, sym, offset, modname);
+				report_probe(pi, kp, sym, offset, modname, p);
 		} else
-			report_probe(pi, p, sym, offset, modname);
+			report_probe(pi, p, sym, offset, modname, NULL);
 	}
 	preempt_enable();
 	return 0;
@@ -1399,12 +1857,13 @@
 		goto out;
 	}
 
-	if (!kprobes_all_disarmed && kprobe_disabled(p))
-		arm_kprobe(p);
-
-	p->flags &= ~KPROBE_FLAG_DISABLED;
 	if (p != kp)
 		kp->flags &= ~KPROBE_FLAG_DISABLED;
+
+	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
+		p->flags &= ~KPROBE_FLAG_DISABLED;
+		arm_kprobe(p);
+	}
 out:
 	mutex_unlock(&kprobe_mutex);
 	return ret;
@@ -1424,12 +1883,13 @@
 	if (!kprobes_all_disarmed)
 		goto already_enabled;
 
+	/* Arming kprobes doesn't optimize kprobe itself */
 	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist)
 			if (!kprobe_disabled(p))
-				arch_arm_kprobe(p);
+				__arm_kprobe(p);
 	}
 	mutex_unlock(&text_mutex);
 
@@ -1456,16 +1916,23 @@
 
 	kprobes_all_disarmed = true;
 	printk(KERN_INFO "Kprobes globally disabled\n");
+
+	/*
+	 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
+	 * because disarming may also unoptimize kprobes.
+	 */
+	get_online_cpus();
 	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist) {
 			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-				arch_disarm_kprobe(p);
+				__disarm_kprobe(p);
 		}
 	}
 
 	mutex_unlock(&text_mutex);
+	put_online_cpus();
 	mutex_unlock(&kprobe_mutex);
 	/* Allow all currently running kprobes to complete */
 	synchronize_sched();
diff --git a/kernel/padata.c b/kernel/padata.c
index 6f9bcb8..93caf65 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -642,6 +642,9 @@
 	if (!pd)
 		goto err_free_inst;
 
+	if (!alloc_cpumask_var(&pinst->cpumask, GFP_KERNEL))
+		goto err_free_pd;
+
 	rcu_assign_pointer(pinst->pd, pd);
 
 	pinst->wq = wq;
@@ -654,12 +657,14 @@
 	pinst->cpu_notifier.priority = 0;
 	err = register_hotcpu_notifier(&pinst->cpu_notifier);
 	if (err)
-		goto err_free_pd;
+		goto err_free_cpumask;
 
 	mutex_init(&pinst->lock);
 
 	return pinst;
 
+err_free_cpumask:
+	free_cpumask_var(pinst->cpumask);
 err_free_pd:
 	padata_free_pd(pd);
 err_free_inst:
@@ -685,6 +690,7 @@
 
 	unregister_hotcpu_notifier(&pinst->cpu_notifier);
 	padata_free_pd(pinst->pd);
+	free_cpumask_var(pinst->cpumask);
 	kfree(pinst);
 }
 EXPORT_SYMBOL(padata_free);
diff --git a/kernel/signal.c b/kernel/signal.c
index 934ae5e..5bb9baf 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -159,6 +159,10 @@
 
 /* Given the mask, find the first available signal that should be serviced. */
 
+#define SYNCHRONOUS_MASK \
+	(sigmask(SIGSEGV) | sigmask(SIGBUS) | sigmask(SIGILL) | \
+	 sigmask(SIGTRAP) | sigmask(SIGFPE))
+
 int next_signal(struct sigpending *pending, sigset_t *mask)
 {
 	unsigned long i, *s, *m, x;
@@ -166,26 +170,39 @@
 
 	s = pending->signal.sig;
 	m = mask->sig;
+
+	/*
+	 * Handle the first word specially: it contains the
+	 * synchronous signals that need to be dequeued first.
+	 */
+	x = *s &~ *m;
+	if (x) {
+		if (x & SYNCHRONOUS_MASK)
+			x &= SYNCHRONOUS_MASK;
+		sig = ffz(~x) + 1;
+		return sig;
+	}
+
 	switch (_NSIG_WORDS) {
 	default:
-		for (i = 0; i < _NSIG_WORDS; ++i, ++s, ++m)
-			if ((x = *s &~ *m) != 0) {
-				sig = ffz(~x) + i*_NSIG_BPW + 1;
-				break;
-			}
-		break;
-
-	case 2: if ((x = s[0] &~ m[0]) != 0)
-			sig = 1;
-		else if ((x = s[1] &~ m[1]) != 0)
-			sig = _NSIG_BPW + 1;
-		else
+		for (i = 1; i < _NSIG_WORDS; ++i) {
+			x = *++s &~ *++m;
+			if (!x)
+				continue;
+			sig = ffz(~x) + i*_NSIG_BPW + 1;
 			break;
-		sig += ffz(~x);
+		}
 		break;
 
-	case 1: if ((x = *s &~ *m) != 0)
-			sig = ffz(~x) + 1;
+	case 2:
+		x = s[1] &~ m[1];
+		if (!x)
+			break;
+		sig = ffz(~x) + _NSIG_BPW + 1;
+		break;
+
+	case 1:
+		/* Nothing to do */
 		break;
 	}
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 33e7a38..0ef19c6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,6 +50,7 @@
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
 #include <linux/perf_event.h>
+#include <linux/kprobes.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1450,6 +1451,17 @@
 		.proc_handler	= proc_dointvec
 	},
 #endif
+#if defined(CONFIG_OPTPROBES)
+	{
+		.procname	= "kprobes-optimization",
+		.data		= &sysctl_kprobes_optimization,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_kprobes_optimization_handler,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
 	{ }
 };
 
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 8f5d16e..8cd50d8 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1331,7 +1331,7 @@
 	ssize_t result;
 	char *pathname;
 	int flags;
-	int acc_mode, fmode;
+	int acc_mode;
 
 	pathname = sysctl_getname(name, nlen, &table);
 	result = PTR_ERR(pathname);
@@ -1342,15 +1342,12 @@
 	if (oldval && oldlen && newval && newlen) {
 		flags = O_RDWR;
 		acc_mode = MAY_READ | MAY_WRITE;
-		fmode = FMODE_READ | FMODE_WRITE;
 	} else if (newval && newlen) {
 		flags = O_WRONLY;
 		acc_mode = MAY_WRITE;
-		fmode = FMODE_WRITE;
 	} else if (oldval && oldlen) {
 		flags = O_RDONLY;
 		acc_mode = MAY_READ;
-		fmode = FMODE_READ;
 	} else {
 		result = 0;
 		goto out_putname;
@@ -1361,7 +1358,7 @@
 	if (result)
 		goto out_putname;
 
-	result = may_open(&nd.path, acc_mode, fmode);
+	result = may_open(&nd.path, acc_mode, flags);
 	if (result)
 		goto out_putpath;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 698ea80..148b52a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1117,7 +1117,7 @@
 			if (!PageUptodate(page)) {
 				if (page->mapping == NULL) {
 					/*
-					 * invalidate_inode_pages got it
+					 * invalidate_mapping_pages got it
 					 */
 					unlock_page(page);
 					page_cache_release(page);
diff --git a/net/9p/client.c b/net/9p/client.c
index 09d4f1e..bde9f3d 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -46,6 +46,7 @@
 	Opt_msize,
 	Opt_trans,
 	Opt_legacy,
+	Opt_version,
 	Opt_err,
 };
 
@@ -53,9 +54,42 @@
 	{Opt_msize, "msize=%u"},
 	{Opt_legacy, "noextend"},
 	{Opt_trans, "trans=%s"},
+	{Opt_version, "version=%s"},
 	{Opt_err, NULL},
 };
 
+inline int p9_is_proto_dotl(struct p9_client *clnt)
+{
+	return (clnt->proto_version == p9_proto_2010L);
+}
+EXPORT_SYMBOL(p9_is_proto_dotl);
+
+inline int p9_is_proto_dotu(struct p9_client *clnt)
+{
+	return (clnt->proto_version == p9_proto_2000u);
+}
+EXPORT_SYMBOL(p9_is_proto_dotu);
+
+/* Interpret mount option for protocol version */
+static unsigned char get_protocol_version(const substring_t *name)
+{
+	unsigned char version = -EINVAL;
+	if (!strncmp("9p2000", name->from, name->to-name->from)) {
+		version = p9_proto_legacy;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n");
+	} else if (!strncmp("9p2000.u", name->from, name->to-name->from)) {
+		version = p9_proto_2000u;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
+	} else if (!strncmp("9p2010.L", name->from, name->to-name->from)) {
+		version = p9_proto_2010L;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n");
+	} else {
+		P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ",
+							name->from);
+	}
+	return version;
+}
+
 static struct p9_req_t *
 p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
 
@@ -75,7 +109,7 @@
 	int option;
 	int ret = 0;
 
-	clnt->dotu = 1;
+	clnt->proto_version = p9_proto_2000u;
 	clnt->msize = 8192;
 
 	if (!opts)
@@ -118,7 +152,13 @@
 			}
 			break;
 		case Opt_legacy:
-			clnt->dotu = 0;
+			clnt->proto_version = p9_proto_legacy;
+			break;
+		case Opt_version:
+			ret = get_protocol_version(&args[0]);
+			if (ret == -EINVAL)
+				goto free_and_return;
+			clnt->proto_version = ret;
 			break;
 		default:
 			continue;
@@ -410,14 +450,15 @@
 		int ecode;
 		char *ename;
 
-		err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode);
+		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+							&ename, &ecode);
 		if (err) {
 			P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
 									err);
 			return err;
 		}
 
-		if (c->dotu)
+		if (p9_is_proto_dotu(c))
 			err = -ecode;
 
 		if (!err || !IS_ERR_VALUE(err))
@@ -515,7 +556,7 @@
 	/* marshall the data */
 	p9pdu_prepare(req->tc, tag, type);
 	va_start(ap, fmt);
-	err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap);
+	err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
 	va_end(ap);
 	p9pdu_finalize(req->tc);
 
@@ -627,14 +668,31 @@
 	char *version;
 	int msize;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n",
-							c->msize, c->dotu);
-	req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
-				c->dotu ? "9P2000.u" : "9P2000");
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
+						c->msize, c->proto_version);
+
+	switch (c->proto_version) {
+	case p9_proto_2010L:
+		req = p9_client_rpc(c, P9_TVERSION, "ds",
+					c->msize, "9P2010.L");
+		break;
+	case p9_proto_2000u:
+		req = p9_client_rpc(c, P9_TVERSION, "ds",
+					c->msize, "9P2000.u");
+		break;
+	case p9_proto_legacy:
+		req = p9_client_rpc(c, P9_TVERSION, "ds",
+					c->msize, "9P2000");
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
+	err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
 	if (err) {
 		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
 		p9pdu_dump(1, req->rc);
@@ -642,10 +700,12 @@
 	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
-	if (!memcmp(version, "9P2000.u", 8))
-		c->dotu = 1;
-	else if (!memcmp(version, "9P2000", 6))
-		c->dotu = 0;
+	if (!strncmp(version, "9P2010.L", 8))
+		c->proto_version = p9_proto_2010L;
+	else if (!strncmp(version, "9P2000.u", 8))
+		c->proto_version = p9_proto_2000u;
+	else if (!strncmp(version, "9P2000", 6))
+		c->proto_version = p9_proto_legacy;
 	else {
 		err = -EREMOTEIO;
 		goto error;
@@ -700,8 +760,8 @@
 		goto put_trans;
 	}
 
-	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
-		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
+		clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
@@ -784,7 +844,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -833,7 +893,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -891,7 +951,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -952,7 +1012,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -997,7 +1057,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1098,7 +1158,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1159,7 +1219,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1199,7 +1259,7 @@
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -1226,7 +1286,7 @@
 }
 EXPORT_SYMBOL(p9_client_stat);
 
-static int p9_client_statsize(struct p9_wstat *wst, int optional)
+static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 {
 	int ret;
 
@@ -1245,7 +1305,7 @@
 	if (wst->muid)
 		ret += strlen(wst->muid);
 
-	if (optional) {
+	if (proto_version == p9_proto_2000u) {
 		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
 		if (wst->extension)
 			ret += strlen(wst->extension);
@@ -1262,7 +1322,7 @@
 
 	err = 0;
 	clnt = fid->clnt;
-	wst->size = p9_client_statsize(wst, clnt->dotu);
+	wst->size = p9_client_statsize(wst, clnt->proto_version);
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
 	P9_DPRINTK(P9_DEBUG_9P,
 		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index fc70147..94f5a8f 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -52,7 +52,7 @@
 #endif
 
 static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 
 #ifdef CONFIG_NET_9P_DEBUG
 void
@@ -144,7 +144,8 @@
 */
 
 static int
-p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
+	va_list ap)
 {
 	const char *ptr;
 	int errcode = 0;
@@ -194,7 +195,8 @@
 				int16_t len;
 				int size;
 
-				errcode = p9pdu_readf(pdu, optional, "w", &len);
+				errcode = p9pdu_readf(pdu, proto_version,
+								"w", &len);
 				if (errcode)
 					break;
 
@@ -217,7 +219,7 @@
 				struct p9_qid *qid =
 				    va_arg(ap, struct p9_qid *);
 
-				errcode = p9pdu_readf(pdu, optional, "bdq",
+				errcode = p9pdu_readf(pdu, proto_version, "bdq",
 						      &qid->type, &qid->version,
 						      &qid->path);
 			}
@@ -230,7 +232,7 @@
 				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
 									-1;
 				errcode =
-				    p9pdu_readf(pdu, optional,
+				    p9pdu_readf(pdu, proto_version,
 						"wwdQdddqssss?sddd",
 						&stbuf->size, &stbuf->type,
 						&stbuf->dev, &stbuf->qid,
@@ -250,7 +252,7 @@
 				void **data = va_arg(ap, void **);
 
 				errcode =
-				    p9pdu_readf(pdu, optional, "d", count);
+				    p9pdu_readf(pdu, proto_version, "d", count);
 				if (!errcode) {
 					*count =
 					    MIN(*count,
@@ -263,8 +265,8 @@
 				int16_t *nwname = va_arg(ap, int16_t *);
 				char ***wnames = va_arg(ap, char ***);
 
-				errcode =
-				    p9pdu_readf(pdu, optional, "w", nwname);
+				errcode = p9pdu_readf(pdu, proto_version,
+								"w", nwname);
 				if (!errcode) {
 					*wnames =
 					    kmalloc(sizeof(char *) * *nwname,
@@ -278,7 +280,8 @@
 
 					for (i = 0; i < *nwname; i++) {
 						errcode =
-						    p9pdu_readf(pdu, optional,
+						    p9pdu_readf(pdu,
+								proto_version,
 								"s",
 								&(*wnames)[i]);
 						if (errcode)
@@ -306,7 +309,7 @@
 				*wqids = NULL;
 
 				errcode =
-				    p9pdu_readf(pdu, optional, "w", nwqid);
+				    p9pdu_readf(pdu, proto_version, "w", nwqid);
 				if (!errcode) {
 					*wqids =
 					    kmalloc(*nwqid *
@@ -321,7 +324,8 @@
 
 					for (i = 0; i < *nwqid; i++) {
 						errcode =
-						    p9pdu_readf(pdu, optional,
+						    p9pdu_readf(pdu,
+								proto_version,
 								"Q",
 								&(*wqids)[i]);
 						if (errcode)
@@ -336,7 +340,7 @@
 			}
 			break;
 		case '?':
-			if (!optional)
+			if (proto_version != p9_proto_2000u)
 				return 0;
 			break;
 		default:
@@ -352,7 +356,8 @@
 }
 
 int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+	va_list ap)
 {
 	const char *ptr;
 	int errcode = 0;
@@ -389,7 +394,8 @@
 				if (sptr)
 					len = MIN(strlen(sptr), USHORT_MAX);
 
-				errcode = p9pdu_writef(pdu, optional, "w", len);
+				errcode = p9pdu_writef(pdu, proto_version,
+								"w", len);
 				if (!errcode && pdu_write(pdu, sptr, len))
 					errcode = -EFAULT;
 			}
@@ -398,7 +404,7 @@
 				const struct p9_qid *qid =
 				    va_arg(ap, const struct p9_qid *);
 				errcode =
-				    p9pdu_writef(pdu, optional, "bdq",
+				    p9pdu_writef(pdu, proto_version, "bdq",
 						 qid->type, qid->version,
 						 qid->path);
 			} break;
@@ -406,7 +412,7 @@
 				const struct p9_wstat *stbuf =
 				    va_arg(ap, const struct p9_wstat *);
 				errcode =
-				    p9pdu_writef(pdu, optional,
+				    p9pdu_writef(pdu, proto_version,
 						 "wwdQdddqssss?sddd",
 						 stbuf->size, stbuf->type,
 						 stbuf->dev, &stbuf->qid,
@@ -421,8 +427,8 @@
 				int32_t count = va_arg(ap, int32_t);
 				const void *data = va_arg(ap, const void *);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "d", count);
+				errcode = p9pdu_writef(pdu, proto_version, "d",
+									count);
 				if (!errcode && pdu_write(pdu, data, count))
 					errcode = -EFAULT;
 			}
@@ -431,8 +437,8 @@
 				int32_t count = va_arg(ap, int32_t);
 				const char __user *udata =
 						va_arg(ap, const void __user *);
-				errcode =
-				    p9pdu_writef(pdu, optional, "d", count);
+				errcode = p9pdu_writef(pdu, proto_version, "d",
+									count);
 				if (!errcode && pdu_write_u(pdu, udata, count))
 					errcode = -EFAULT;
 			}
@@ -441,14 +447,15 @@
 				int16_t nwname = va_arg(ap, int);
 				const char **wnames = va_arg(ap, const char **);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "w", nwname);
+				errcode = p9pdu_writef(pdu, proto_version, "w",
+									nwname);
 				if (!errcode) {
 					int i;
 
 					for (i = 0; i < nwname; i++) {
 						errcode =
-						    p9pdu_writef(pdu, optional,
+						    p9pdu_writef(pdu,
+								proto_version,
 								 "s",
 								 wnames[i]);
 						if (errcode)
@@ -462,14 +469,15 @@
 				struct p9_qid *wqids =
 				    va_arg(ap, struct p9_qid *);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "w", nwqid);
+				errcode = p9pdu_writef(pdu, proto_version, "w",
+									nwqid);
 				if (!errcode) {
 					int i;
 
 					for (i = 0; i < nwqid; i++) {
 						errcode =
-						    p9pdu_writef(pdu, optional,
+						    p9pdu_writef(pdu,
+								proto_version,
 								 "Q",
 								 &wqids[i]);
 						if (errcode)
@@ -479,7 +487,7 @@
 			}
 			break;
 		case '?':
-			if (!optional)
+			if (proto_version != p9_proto_2000u)
 				return 0;
 			break;
 		default:
@@ -494,32 +502,32 @@
 	return errcode;
 }
 
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
-	ret = p9pdu_vreadf(pdu, optional, fmt, ap);
+	ret = p9pdu_vreadf(pdu, proto_version, fmt, ap);
 	va_end(ap);
 
 	return ret;
 }
 
 static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
-	ret = p9pdu_vwritef(pdu, optional, fmt, ap);
+	ret = p9pdu_vwritef(pdu, proto_version, fmt, ap);
 	va_end(ap);
 
 	return ret;
 }
 
-int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
+int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
 {
 	struct p9_fcall fake_pdu;
 	int ret;
@@ -529,7 +537,7 @@
 	fake_pdu.sdata = buf;
 	fake_pdu.offset = 0;
 
-	ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
+	ret = p9pdu_readf(&fake_pdu, proto_version, "S", st);
 	if (ret) {
 		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
 		p9pdu_dump(1, &fake_pdu);
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index ccde462..2431c0f 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -25,9 +25,9 @@
  *
  */
 
-int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+								va_list ap);
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
 int p9pdu_finalize(struct p9_fcall *pdu);
 void p9pdu_dump(int, struct p9_fcall *);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cb50f4a..0aaed48 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -49,8 +49,6 @@
 
 /* a single mutex to manage channel initialization and attachment */
 static DEFINE_MUTEX(virtio_9p_lock);
-/* global which tracks highest initialized channel */
-static int chan_index;
 
 /**
  * struct virtio_chan - per-instance transport information
@@ -68,8 +66,7 @@
  *
  */
 
-static struct virtio_chan {
-	bool initialized;
+struct virtio_chan {
 	bool inuse;
 
 	spinlock_t lock;
@@ -80,7 +77,11 @@
 
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
-} channels[MAX_9P_CHAN];
+
+	struct list_head chan_list;
+};
+
+static struct list_head virtio_chan_list;
 
 /* How many bytes left in this page. */
 static unsigned int rest_of_page(void *data)
@@ -217,9 +218,7 @@
  * p9_virtio_probe - probe for existence of 9P virtio channels
  * @vdev: virtio device to probe
  *
- * This probes for existing virtio channels.  At present only
- * a single channel is in use, so in the future more work may need
- * to be done here.
+ * This probes for existing virtio channels.
  *
  */
 
@@ -227,16 +226,10 @@
 {
 	int err;
 	struct virtio_chan *chan;
-	int index;
 
-	mutex_lock(&virtio_9p_lock);
-	index = chan_index++;
-	chan = &channels[index];
-	mutex_unlock(&virtio_9p_lock);
-
-	if (chan_index > MAX_9P_CHAN) {
-		printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n");
-		BUG();
+	chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
+	if (!chan) {
+		printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
 		err = -ENOMEM;
 		goto fail;
 	}
@@ -255,15 +248,15 @@
 	sg_init_table(chan->sg, VIRTQUEUE_NUM);
 
 	chan->inuse = false;
-	chan->initialized = true;
+	mutex_lock(&virtio_9p_lock);
+	list_add_tail(&chan->chan_list, &virtio_chan_list);
+	mutex_unlock(&virtio_9p_lock);
 	return 0;
 
 out_free_vq:
 	vdev->config->del_vqs(vdev);
+	kfree(chan);
 fail:
-	mutex_lock(&virtio_9p_lock);
-	chan_index--;
-	mutex_unlock(&virtio_9p_lock);
 	return err;
 }
 
@@ -280,35 +273,31 @@
  * We use a simple reference count mechanism to ensure that only a single
  * mount has a channel open at a time.
  *
- * Bugs: doesn't allow identification of a specific channel
- * to allocate, channels are allocated sequentially. This was
- * a pragmatic decision to get things rolling, but ideally some
- * way of identifying the channel to attach to would be nice
- * if we are going to support multiple channels.
- *
  */
 
 static int
 p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
-	struct virtio_chan *chan = channels;
-	int index = 0;
+	struct virtio_chan *chan;
+	int ret = -ENOENT;
+	int found = 0;
 
 	mutex_lock(&virtio_9p_lock);
-	while (index < MAX_9P_CHAN) {
-		if (chan->initialized && !chan->inuse) {
-			chan->inuse = true;
-			break;
-		} else {
-			index++;
-			chan = &channels[index];
+	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
+		if (!strcmp(devname, dev_name(&chan->vdev->dev))) {
+			if (!chan->inuse) {
+				chan->inuse = true;
+				found = 1;
+				break;
+			}
+			ret = -EBUSY;
 		}
 	}
 	mutex_unlock(&virtio_9p_lock);
 
-	if (index >= MAX_9P_CHAN) {
+	if (!found) {
 		printk(KERN_ERR "9p: no channels available\n");
-		return -ENODEV;
+		return ret;
 	}
 
 	client->trans = (void *)chan;
@@ -329,11 +318,13 @@
 	struct virtio_chan *chan = vdev->priv;
 
 	BUG_ON(chan->inuse);
+	vdev->config->del_vqs(vdev);
 
-	if (chan->initialized) {
-		vdev->config->del_vqs(vdev);
-		chan->initialized = false;
-	}
+	mutex_lock(&virtio_9p_lock);
+	list_del(&chan->chan_list);
+	mutex_unlock(&virtio_9p_lock);
+	kfree(chan);
+
 }
 
 static struct virtio_device_id id_table[] = {
@@ -364,10 +355,7 @@
 /* The standard init function */
 static int __init p9_virtio_init(void)
 {
-	int count;
-
-	for (count = 0; count < MAX_9P_CHAN; count++)
-		channels[count].initialized = false;
+	INIT_LIST_HEAD(&virtio_chan_list);
 
 	v9fs_register_trans(&p9_virtio_trans);
 	return register_virtio_driver(&p9_virtio_drv);
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 6dcdd25..f845d9d 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -71,8 +71,9 @@
 	if (unlikely(len == 0))
 		return len;
 
-	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
-	    !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+		return len;
+	if (sin6->sin6_scope_id == 0)
 		return len;
 
 	rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
@@ -165,8 +166,7 @@
 	if (*delim != IPV6_SCOPE_DELIMITER)
 		return 0;
 
-	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
-	    !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
 		return 0;
 
 	len = (buf + buflen) - delim - 1;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index f7a7f83..0cfccc2a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -206,8 +206,14 @@
 	ctx->gc_win = window_size;
 	/* gssd signals an error by passing ctx->gc_win = 0: */
 	if (ctx->gc_win == 0) {
-		/* in which case, p points to  an error code which we ignore */
-		p = ERR_PTR(-EACCES);
+		/*
+		 * in which case, p points to an error code. Anything other
+		 * than -EKEYEXPIRED gets converted to -EACCES.
+		 */
+		p = simple_get_bytes(p, end, &ret, sizeof(ret));
+		if (!IS_ERR(p))
+			p = (ret == -EKEYEXPIRED) ? ERR_PTR(-EKEYEXPIRED) :
+						    ERR_PTR(-EACCES);
 		goto err;
 	}
 	/* copy the opaque wire context */
@@ -646,6 +652,7 @@
 		err = PTR_ERR(p);
 		switch (err) {
 		case -EACCES:
+		case -EKEYEXPIRED:
 			gss_msg->msg.errno = err;
 			err = mlen;
 			break;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9ea4538..8d63f8f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -999,19 +999,14 @@
 	inode = rpc_get_inode(sb, S_IFDIR | 0755);
 	if (!inode)
 		return -ENOMEM;
-	root = d_alloc_root(inode);
+	sb->s_root = root = d_alloc_root(inode);
 	if (!root) {
 		iput(inode);
 		return -ENOMEM;
 	}
 	if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
-		goto out;
-	sb->s_root = root;
+		return -ENOMEM;
 	return 0;
-out:
-	d_genocide(root);
-	dput(root);
-	return -ENOMEM;
 }
 
 static int
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 538ca43..6dcf8c9 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -506,6 +506,10 @@
 {
 	unsigned int pages, arghi;
 
+	/* bc_xprt uses fore channel allocated buffers */
+	if (svc_is_backchannel(rqstp))
+		return 1;
+
 	pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
 				       * We assume one is at most one page
 				       */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d739e5..7124129 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1912,6 +1912,11 @@
 	case -EALREADY:
 		xprt_clear_connecting(xprt);
 		return;
+	case -EINVAL:
+		/* Happens, for instance, if the user specified a link
+		 * local IPv6 address without a scope-id.
+		 */
+		goto out;
 	}
 out_eagain:
 	status = -EAGAIN;
@@ -2100,7 +2105,7 @@
  * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
  * to use the server side send routines.
  */
-void *bc_malloc(struct rpc_task *task, size_t size)
+static void *bc_malloc(struct rpc_task *task, size_t size)
 {
 	struct page *page;
 	struct rpc_buffer *buf;
@@ -2120,7 +2125,7 @@
 /*
  * Free the space allocated in the bc_alloc routine
  */
-void bc_free(void *buffer)
+static void bc_free(void *buffer)
 {
 	struct rpc_buffer *buf;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index a5721b3..5225e66 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -387,7 +387,7 @@
 	struct smk_audit_info ad;
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS);
-	smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_mountpoint);
+	smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_root);
 	smk_ad_setfield_u_fs_path_mnt(&ad, mnt);
 
 	sbp = mnt->mnt_sb->s_security;
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index c00df45..cf7d61f 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -88,29 +88,14 @@
 		sp = dentry->d_op->d_dname(dentry, newname + offset,
 					   newname_len - offset);
 	} else {
-		/* Taken from d_namespace_path(). */
-		struct path root;
-		struct path ns_root = { };
-		struct path tmp;
+		struct path ns_root = {.mnt = NULL, .dentry = NULL};
 
-		read_lock(&current->fs->lock);
-		root = current->fs->root;
-		path_get(&root);
-		read_unlock(&current->fs->lock);
-		spin_lock(&vfsmount_lock);
-		if (root.mnt && root.mnt->mnt_ns)
-			ns_root.mnt = mntget(root.mnt->mnt_ns->root);
-		if (ns_root.mnt)
-			ns_root.dentry = dget(ns_root.mnt->mnt_root);
-		spin_unlock(&vfsmount_lock);
 		spin_lock(&dcache_lock);
-		tmp = ns_root;
-		sp = __d_path(path, &tmp, newname, newname_len);
+		/* go to whatever namespace root we are under */
+		sp = __d_path(path, &ns_root, newname, newname_len);
 		spin_unlock(&dcache_lock);
-		path_put(&root);
-		path_put(&ns_root);
 		/* Prepend "/proc" prefix if using internal proc vfs mount. */
-		if (!IS_ERR(sp) && (path->mnt->mnt_parent == path->mnt) &&
+		if (!IS_ERR(sp) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
 		    (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {
 			sp -= 5;
 			if (sp >= newname)
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 2de3407..34202b1 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -41,7 +41,8 @@
 
 -d::
 --del=::
-	Delete a probe event.
+	Delete probe events. This accepts glob wildcards('*', '?') and character
+	classes(e.g. [a-z], [!A-Z]).
 
 -l::
 --list::
@@ -50,17 +51,29 @@
 -L::
 --line=::
 	Show source code lines which can be probed. This needs an argument
-	which specifies a range of the source code.
+	which specifies a range of the source code. (see LINE SYNTAX for detail)
+
+-f::
+--force::
+	Forcibly add events with existing name.
 
 PROBE SYNTAX
 ------------
 Probe points are defined by following syntax.
 
- "[EVENT=]FUNC[+OFFS|:RLN|%return][@SRC]|SRC:ALN [ARG ...]"
+    1) Define event based on function name
+     [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
+
+    2) Define event based on source file with line number
+     [EVENT=]SRC:ALN [ARG ...]
+
+    3) Define event based on source file with lazy pattern
+     [EVENT=]SRC;PTN [ARG ...]
+
 
 'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'.
-'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, 'RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. In addition, 'SRC' specifies a source file which has that function.
-It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
+'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition.  In addition, '@SRC' specifies a source file which has that function.
+It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
 'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
 
 LINE SYNTAX
@@ -76,6 +89,41 @@
 many lines to show by using 'NUM'.
 So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
 
+LAZY MATCHING
+-------------
+ The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
+
+e.g.
+ 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on.
+
+This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
+
+
+EXAMPLES
+--------
+Display which lines in schedule() can be probed:
+
+ ./perf probe --line schedule
+
+Add a probe on schedule() function 12th line with recording cpu local variable:
+
+ ./perf probe schedule:12 cpu
+ or
+ ./perf probe --add='schedule:12 cpu'
+
+ this will add one or more probes which has the name start with "schedule".
+
+ Add probes on lines in schedule() function which calls update_rq_clock().
+
+ ./perf probe 'schedule;update_rq_clock*'
+ or
+ ./perf probe --add='schedule;update_rq_clock*'
+
+Delete all probes on schedule().
+
+ ./perf probe --del='schedule*'
+
+
 SEE ALSO
 --------
 linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 54a5b50..2d53738 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -500,12 +500,12 @@
 	msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
 endif
 
-ifneq ($(shell sh -c "(echo '\#ifndef _MIPS_SZLONG'; echo '\#define _MIPS_SZLONG 0'; echo '\#endif'; echo '\#include <dwarf.h>'; echo '\#include <libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -I/usr/include/libdwarf -ldwarf -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
-	msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231);
-	BASIC_CFLAGS += -DNO_LIBDWARF
+ifneq ($(shell sh -c "(echo '\#include <dwarf.h>'; echo '\#include <libdw.h>'; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+	msg := $(warning No libdw.h found or old libdw.h found, disables dwarf support. Please install elfutils-devel/elfutils-dev);
+	BASIC_CFLAGS += -DNO_DWARF_SUPPORT
 else
-	BASIC_CFLAGS += -I/usr/include/libdwarf
-	EXTLIBS += -lelf -ldwarf
+	BASIC_CFLAGS += -I/usr/include/elfutils
+	EXTLIBS += -lelf -ldw
 	LIB_OBJS += util/probe-finder.o
 endif
 
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index ad47bd4..c30a335 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -128,7 +128,7 @@
 		    pp->function);
 }
 
-#ifndef NO_LIBDWARF
+#ifndef NO_DWARF_SUPPORT
 static int open_vmlinux(void)
 {
 	if (map__load(session.kmaps[MAP__FUNCTION], NULL) < 0) {
@@ -156,14 +156,16 @@
 	"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
 	"perf probe [<options>] --del '[GROUP:]EVENT' ...",
 	"perf probe --list",
+#ifndef NO_DWARF_SUPPORT
 	"perf probe --line 'LINEDESC'",
+#endif
 	NULL
 };
 
 static const struct option options[] = {
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show parsed arguments, etc)"),
-#ifndef NO_LIBDWARF
+#ifndef NO_DWARF_SUPPORT
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 #endif
@@ -172,30 +174,32 @@
 	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
 		opt_del_probe_event),
 	OPT_CALLBACK('a', "add", NULL,
-#ifdef NO_LIBDWARF
-		"[EVENT=]FUNC[+OFFS|%return] [ARG ...]",
+#ifdef NO_DWARF_SUPPORT
+		"[EVENT=]FUNC[+OFF|%return] [ARG ...]",
 #else
-		"[EVENT=]FUNC[+OFFS|%return|:RLN][@SRC]|SRC:ALN [ARG ...]",
+		"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
+		" [ARG ...]",
 #endif
 		"probe point definition, where\n"
 		"\t\tGROUP:\tGroup name (optional)\n"
 		"\t\tEVENT:\tEvent name\n"
 		"\t\tFUNC:\tFunction name\n"
-		"\t\tOFFS:\tOffset from function entry (in byte)\n"
+		"\t\tOFF:\tOffset from function entry (in byte)\n"
 		"\t\t%return:\tPut the probe at function return\n"
-#ifdef NO_LIBDWARF
+#ifdef NO_DWARF_SUPPORT
 		"\t\tARG:\tProbe argument (only \n"
 #else
 		"\t\tSRC:\tSource code path\n"
-		"\t\tRLN:\tRelative line number from function entry.\n"
-		"\t\tALN:\tAbsolute line number in file.\n"
+		"\t\tRL:\tRelative line number from function entry.\n"
+		"\t\tAL:\tAbsolute line number in file.\n"
+		"\t\tPT:\tLazy expression of line code.\n"
 		"\t\tARG:\tProbe argument (local variable name or\n"
 #endif
 		"\t\t\tkprobe-tracer argument format.)\n",
 		opt_add_probe_event),
 	OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events"
 		    " with existing name"),
-#ifndef NO_LIBDWARF
+#ifndef NO_DWARF_SUPPORT
 	OPT_CALLBACK('L', "line", NULL,
 		     "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
 		     "Show source code lines.", opt_show_lines),
@@ -223,7 +227,7 @@
 int cmd_probe(int argc, const char **argv, const char *prefix __used)
 {
 	int i, ret;
-#ifndef NO_LIBDWARF
+#ifndef NO_DWARF_SUPPORT
 	int fd;
 #endif
 	struct probe_point *pp;
@@ -259,7 +263,7 @@
 		return 0;
 	}
 
-#ifndef NO_LIBDWARF
+#ifndef NO_DWARF_SUPPORT
 	if (session.show_lines) {
 		if (session.nr_probe != 0 || session.dellist) {
 			pr_warning("  Error: Don't use --line with"
@@ -290,9 +294,9 @@
 	init_vmlinux();
 
 	if (session.need_dwarf)
-#ifdef NO_LIBDWARF
+#ifdef NO_DWARF_SUPPORT
 		die("Debuginfo-analysis is not supported");
-#else	/* !NO_LIBDWARF */
+#else	/* !NO_DWARF_SUPPORT */
 		pr_debug("Some probes require debuginfo.\n");
 
 	fd = open_vmlinux();
@@ -312,7 +316,7 @@
 			continue;
 
 		lseek(fd, SEEK_SET, 0);
-		ret = find_probepoint(fd, pp);
+		ret = find_probe_point(fd, pp);
 		if (ret > 0)
 			continue;
 		if (ret == 0) {	/* No error but failed to find probe point. */
@@ -333,7 +337,7 @@
 	close(fd);
 
 end_dwarf:
-#endif /* !NO_LIBDWARF */
+#endif /* !NO_DWARF_SUPPORT */
 
 	/* Synthesize probes without dwarf */
 	for (i = 0; i < session.nr_probe; i++) {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8f05688..c971e81 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -119,14 +119,14 @@
 	char c, nc = 0;
 	/*
 	 * <Syntax>
-	 * perf probe [EVENT=]SRC:LN
-	 * perf probe [EVENT=]FUNC[+OFFS|%return][@SRC]
+	 * perf probe [EVENT=]SRC[:LN|;PTN]
+	 * perf probe [EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT]
 	 *
 	 * TODO:Group name support
 	 */
 
-	ptr = strchr(arg, '=');
-	if (ptr) {	/* Event name */
+	ptr = strpbrk(arg, ";=@+%");
+	if (ptr && *ptr == '=') {	/* Event name */
 		*ptr = '\0';
 		tmp = ptr + 1;
 		ptr = strchr(arg, ':');
@@ -139,7 +139,7 @@
 		arg = tmp;
 	}
 
-	ptr = strpbrk(arg, ":+@%");
+	ptr = strpbrk(arg, ";:+@%");
 	if (ptr) {
 		nc = *ptr;
 		*ptr++ = '\0';
@@ -156,7 +156,11 @@
 	while (ptr) {
 		arg = ptr;
 		c = nc;
-		ptr = strpbrk(arg, ":+@%");
+		if (c == ';') {	/* Lazy pattern must be the last part */
+			pp->lazy_line = strdup(arg);
+			break;
+		}
+		ptr = strpbrk(arg, ";:+@%");
 		if (ptr) {
 			nc = *ptr;
 			*ptr++ = '\0';
@@ -165,13 +169,13 @@
 		case ':':	/* Line number */
 			pp->line = strtoul(arg, &tmp, 0);
 			if (*tmp != '\0')
-				semantic_error("There is non-digit charactor"
-						" in line number.");
+				semantic_error("There is non-digit char"
+					       " in line number.");
 			break;
 		case '+':	/* Byte offset from a symbol */
 			pp->offset = strtoul(arg, &tmp, 0);
 			if (*tmp != '\0')
-				semantic_error("There is non-digit charactor"
+				semantic_error("There is non-digit character"
 						" in offset.");
 			break;
 		case '@':	/* File name */
@@ -179,9 +183,6 @@
 				semantic_error("SRC@SRC is not allowed.");
 			pp->file = strdup(arg);
 			DIE_IF(pp->file == NULL);
-			if (ptr)
-				semantic_error("@SRC must be the last "
-					       "option.");
 			break;
 		case '%':	/* Probe places */
 			if (strcmp(arg, "return") == 0) {
@@ -196,11 +197,18 @@
 	}
 
 	/* Exclusion check */
+	if (pp->lazy_line && pp->line)
+		semantic_error("Lazy pattern can't be used with line number.");
+
+	if (pp->lazy_line && pp->offset)
+		semantic_error("Lazy pattern can't be used with offset.");
+
 	if (pp->line && pp->offset)
 		semantic_error("Offset can't be used with line number.");
 
-	if (!pp->line && pp->file && !pp->function)
-		semantic_error("File always requires line number.");
+	if (!pp->line && !pp->lazy_line && pp->file && !pp->function)
+		semantic_error("File always requires line number or "
+			       "lazy pattern.");
 
 	if (pp->offset && !pp->function)
 		semantic_error("Offset requires an entry function.");
@@ -208,11 +216,13 @@
 	if (pp->retprobe && !pp->function)
 		semantic_error("Return probe requires an entry function.");
 
-	if ((pp->offset || pp->line) && pp->retprobe)
-		semantic_error("Offset/Line can't be used with return probe.");
+	if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe)
+		semantic_error("Offset/Line/Lazy pattern can't be used with "
+			       "return probe.");
 
-	pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n",
-		 pp->function, pp->file, pp->line, pp->offset, pp->retprobe);
+	pr_debug("symbol:%s file:%s line:%d offset:%d return:%d lazy:%s\n",
+		 pp->function, pp->file, pp->line, pp->offset, pp->retprobe,
+		 pp->lazy_line);
 }
 
 /* Parse perf-probe event definition */
@@ -458,6 +468,8 @@
 		free(pp->function);
 	if (pp->file)
 		free(pp->file);
+	if (pp->lazy_line)
+		free(pp->lazy_line);
 	for (i = 0; i < pp->nr_args; i++)
 		free(pp->args[i]);
 	if (pp->args)
@@ -719,6 +731,7 @@
 }
 
 #define LINEBUF_SIZE 256
+#define NR_ADDITIONAL_LINES 2
 
 static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
 {
@@ -779,5 +792,11 @@
 			show_one_line(fp, (l++) - lr->offset, false, false);
 		show_one_line(fp, (l++) - lr->offset, false, true);
 	}
+
+	if (lr->end == INT_MAX)
+		lr->end = l + NR_ADDITIONAL_LINES;
+	while (l < lr->end && !feof(fp))
+		show_one_line(fp, (l++) - lr->offset, false, false);
+
 	fclose(fp);
 }
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1b2124d..e77dc88 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -32,21 +32,13 @@
 #include <stdarg.h>
 #include <ctype.h>
 
+#include "string.h"
 #include "event.h"
 #include "debug.h"
 #include "util.h"
 #include "probe-finder.h"
 
 
-/* Dwarf_Die Linkage to parent Die */
-struct die_link {
-	struct die_link *parent;	/* Parent die */
-	Dwarf_Die die;			/* Current die */
-};
-
-static Dwarf_Debug __dw_debug;
-static Dwarf_Error __dw_error;
-
 /*
  * Generic dwarf analysis helpers
  */
@@ -113,281 +105,190 @@
 	return 0;
 }
 
-/* Find the fileno of the target file. */
-static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
+/* Line number list operations */
+
+/* Add a line to line number list */
+static void line_list__add_line(struct list_head *head, unsigned int line)
 {
-	Dwarf_Signed cnt, i;
-	Dwarf_Unsigned found = 0;
-	char **srcs;
+	struct line_node *ln;
+	struct list_head *p;
+
+	/* Reverse search, because new line will be the last one */
+	list_for_each_entry_reverse(ln, head, list) {
+		if (ln->line < line) {
+			p = &ln->list;
+			goto found;
+		} else if (ln->line == line)	/* Already exist */
+			return ;
+	}
+	/* List is empty, or the smallest entry */
+	p = head;
+found:
+	pr_debug("line list: add a line %u\n", line);
+	ln = zalloc(sizeof(struct line_node));
+	DIE_IF(ln == NULL);
+	ln->line = line;
+	INIT_LIST_HEAD(&ln->list);
+	list_add(&ln->list, p);
+}
+
+/* Check if the line in line number list */
+static int line_list__has_line(struct list_head *head, unsigned int line)
+{
+	struct line_node *ln;
+
+	/* Reverse search, because new line will be the last one */
+	list_for_each_entry(ln, head, list)
+		if (ln->line == line)
+			return 1;
+
+	return 0;
+}
+
+/* Init line number list */
+static void line_list__init(struct list_head *head)
+{
+	INIT_LIST_HEAD(head);
+}
+
+/* Free line number list */
+static void line_list__free(struct list_head *head)
+{
+	struct line_node *ln;
+	while (!list_empty(head)) {
+		ln = list_first_entry(head, struct line_node, list);
+		list_del(&ln->list);
+		free(ln);
+	}
+}
+
+/* Dwarf wrappers */
+
+/* Find the realpath of the target file. */
+static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
+{
+	Dwarf_Files *files;
+	size_t nfiles, i;
+	const char *src;
 	int ret;
 
 	if (!fname)
-		return 0;
+		return NULL;
 
-	ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
-	if (ret == DW_DLV_OK) {
-		for (i = 0; i < cnt && !found; i++) {
-			if (strtailcmp(srcs[i], fname) == 0)
-				found = i + 1;
-			dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
-		}
-		for (; i < cnt; i++)
-			dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
-		dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
+	ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
+	if (ret != 0)
+		return NULL;
+
+	for (i = 0; i < nfiles; i++) {
+		src = dwarf_filesrc(files, i, NULL, NULL);
+		if (strtailcmp(src, fname) == 0)
+			break;
 	}
-	if (found)
-		pr_debug("found fno: %d\n", (int)found);
-	return found;
+	return src;
 }
 
-static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf)
+struct __addr_die_search_param {
+	Dwarf_Addr	addr;
+	Dwarf_Die	*die_mem;
+};
+
+static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
 {
-	Dwarf_Signed cnt, i;
-	char **srcs;
-	int ret = 0;
+	struct __addr_die_search_param *ad = data;
 
-	if (!buf || !fno)
-		return -EINVAL;
+	if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+	    dwarf_haspc(fn_die, ad->addr)) {
+		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
 
-	ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
-	if (ret == DW_DLV_OK) {
-		if ((Dwarf_Unsigned)cnt > fno - 1) {
-			*buf = strdup(srcs[fno - 1]);
-			ret = 0;
-			pr_debug("found filename: %s\n", *buf);
-		} else
-			ret = -ENOENT;
-		for (i = 0; i < cnt; i++)
-			dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
-		dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
-	} else
-		ret = -EINVAL;
-	return ret;
+/* Search a real subprogram including this line, */
+static Dwarf_Die *die_get_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr,
+					  Dwarf_Die *die_mem)
+{
+	struct __addr_die_search_param ad;
+	ad.addr = addr;
+	ad.die_mem = die_mem;
+	/* dwarf_getscopes can't find subprogram. */
+	if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
+		return NULL;
+	else
+		return die_mem;
+}
+
+/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */
+static Dwarf_Die *die_get_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+				     Dwarf_Die *die_mem)
+{
+	Dwarf_Die child_die;
+	int ret;
+
+	ret = dwarf_child(sp_die, die_mem);
+	if (ret != 0)
+		return NULL;
+
+	do {
+		if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
+		    dwarf_haspc(die_mem, addr))
+			return die_mem;
+
+		if (die_get_inlinefunc(die_mem, addr, &child_die)) {
+			memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
+			return die_mem;
+		}
+	} while (dwarf_siblingof(die_mem, die_mem) == 0);
+
+	return NULL;
 }
 
 /* Compare diename and tname */
-static int die_compare_name(Dwarf_Die dw_die, const char *tname)
+static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
 {
-	char *name;
-	int ret;
-	ret = dwarf_diename(dw_die, &name, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (ret == DW_DLV_OK) {
-		ret = strcmp(tname, name);
-		dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
-	} else
-		ret = -1;
-	return ret;
-}
-
-/* Check the address is in the subprogram(function). */
-static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr,
-				 Dwarf_Signed *offs)
-{
-	Dwarf_Addr lopc, hipc;
-	int ret;
-
-	/* TODO: check ranges */
-	ret = dwarf_lowpc(sp_die, &lopc, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (ret == DW_DLV_NO_ENTRY)
-		return 0;
-	ret = dwarf_highpc(sp_die, &hipc, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	if (lopc <= addr && addr < hipc) {
-		*offs = addr - lopc;
-		return 1;
-	} else
-		return 0;
-}
-
-/* Check the die is inlined function */
-static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die)
-{
-	/* TODO: check strictly */
-	Dwarf_Bool inl;
-	int ret;
-
-	ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	return inl;
-}
-
-/* Get the offset of abstruct_origin */
-static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die)
-{
-	Dwarf_Attribute attr;
-	Dwarf_Off cu_offs;
-	int ret;
-
-	ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	ret = dwarf_formref(attr, &cu_offs, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
-	return cu_offs;
+	const char *name;
+	name = dwarf_diename(dw_die);
+	DIE_IF(name == NULL);
+	return strcmp(tname, name);
 }
 
 /* Get entry pc(or low pc, 1st entry of ranges)  of the die */
-static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die)
+static Dwarf_Addr die_get_entrypc(Dwarf_Die *dw_die)
 {
-	Dwarf_Attribute attr;
-	Dwarf_Addr addr;
-	Dwarf_Off offs;
-	Dwarf_Ranges *ranges;
-	Dwarf_Signed cnt;
+	Dwarf_Addr epc;
 	int ret;
 
-	/* Try to get entry pc */
-	ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (ret == DW_DLV_OK) {
-		ret = dwarf_formaddr(attr, &addr, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
-		dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
-		return addr;
-	}
-
-	/* Try to get low pc */
-	ret = dwarf_lowpc(dw_die, &addr, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (ret == DW_DLV_OK)
-		return addr;
-
-	/* Try to get ranges */
-	ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	ret = dwarf_formref(attr, &offs, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL,
-				&__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	addr = ranges[0].dwr_addr1;
-	dwarf_ranges_dealloc(__dw_debug, ranges, cnt);
-	return addr;
+	ret = dwarf_entrypc(dw_die, &epc);
+	DIE_IF(ret == -1);
+	return epc;
 }
 
-/*
- * Search a Die from Die tree.
- * Note: cur_link->die should be deallocated in this function.
- */
-static int __search_die_tree(struct die_link *cur_link,
-			     int (*die_cb)(struct die_link *, void *),
-			     void *data)
+/* Get a variable die */
+static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name,
+				    Dwarf_Die *die_mem)
 {
-	Dwarf_Die new_die;
-	struct die_link new_link;
+	Dwarf_Die child_die;
+	int tag;
 	int ret;
 
-	if (!die_cb)
-		return 0;
+	ret = dwarf_child(sp_die, die_mem);
+	if (ret != 0)
+		return NULL;
 
-	/* Check current die */
-	while (!(ret = die_cb(cur_link, data))) {
-		/* Check child die */
-		ret = dwarf_child(cur_link->die, &new_die, &__dw_error);
-		DIE_IF(ret == DW_DLV_ERROR);
-		if (ret == DW_DLV_OK) {
-			new_link.parent = cur_link;
-			new_link.die = new_die;
-			ret = __search_die_tree(&new_link, die_cb, data);
-			if (ret)
-				break;
+	do {
+		tag = dwarf_tag(die_mem);
+		if ((tag == DW_TAG_formal_parameter ||
+		     tag == DW_TAG_variable) &&
+		    (die_compare_name(die_mem, name) == 0))
+			return die_mem;
+
+		if (die_find_variable(die_mem, name, &child_die)) {
+			memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
+			return die_mem;
 		}
+	} while (dwarf_siblingof(die_mem, die_mem) == 0);
 
-		/* Move to next sibling */
-		ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die,
-				      &__dw_error);
-		DIE_IF(ret == DW_DLV_ERROR);
-		dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
-		cur_link->die = new_die;
-		if (ret == DW_DLV_NO_ENTRY)
-			return 0;
-	}
-	dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE);
-	return ret;
-}
-
-/* Search a die in its children's die tree */
-static int search_die_from_children(Dwarf_Die parent_die,
-				    int (*die_cb)(struct die_link *, void *),
-				    void *data)
-{
-	struct die_link new_link;
-	int ret;
-
-	new_link.parent = NULL;
-	ret = dwarf_child(parent_die, &new_link.die, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (ret == DW_DLV_OK)
-		return __search_die_tree(&new_link, die_cb, data);
-	else
-		return 0;
-}
-
-/* Find a locdesc corresponding to the address */
-static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc,
-			    Dwarf_Addr addr)
-{
-	Dwarf_Signed lcnt;
-	Dwarf_Locdesc **llbuf;
-	int ret, i;
-
-	ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	ret = DW_DLV_NO_ENTRY;
-	for (i = 0; i < lcnt; ++i) {
-		if (llbuf[i]->ld_lopc <= addr &&
-		    llbuf[i]->ld_hipc > addr) {
-			memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc));
-			desc->ld_s =
-				malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
-			DIE_IF(desc->ld_s == NULL);
-			memcpy(desc->ld_s, llbuf[i]->ld_s,
-				sizeof(Dwarf_Loc) * llbuf[i]->ld_cents);
-			ret = DW_DLV_OK;
-			break;
-		}
-		dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
-		dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
-	}
-	/* Releasing loop */
-	for (; i < lcnt; ++i) {
-		dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK);
-		dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC);
-	}
-	dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST);
-	return ret;
-}
-
-/* Get decl_file attribute value (file number) */
-static Dwarf_Unsigned die_get_decl_file(Dwarf_Die sp_die)
-{
-	Dwarf_Attribute attr;
-	Dwarf_Unsigned fno;
-	int ret;
-
-	ret = dwarf_attr(sp_die, DW_AT_decl_file, &attr, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	dwarf_formudata(attr, &fno, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
-	return fno;
-}
-
-/* Get decl_line attribute value (line number) */
-static Dwarf_Unsigned die_get_decl_line(Dwarf_Die sp_die)
-{
-	Dwarf_Attribute attr;
-	Dwarf_Unsigned lno;
-	int ret;
-
-	ret = dwarf_attr(sp_die, DW_AT_decl_line, &attr, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	dwarf_formudata(attr, &lno, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
-	return lno;
+	return NULL;
 }
 
 /*
@@ -395,47 +296,45 @@
  */
 
 /* Show a location */
-static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
+static void show_location(Dwarf_Op *op, struct probe_finder *pf)
 {
-	Dwarf_Small op;
-	Dwarf_Unsigned regn;
-	Dwarf_Signed offs;
+	unsigned int regn;
+	Dwarf_Word offs = 0;
 	int deref = 0, ret;
 	const char *regs;
 
-	op = loc->lr_atom;
-
+	/* TODO: support CFA */
 	/* If this is based on frame buffer, set the offset */
-	if (op == DW_OP_fbreg) {
+	if (op->atom == DW_OP_fbreg) {
+		if (pf->fb_ops == NULL)
+			die("The attribute of frame base is not supported.\n");
 		deref = 1;
-		offs = (Dwarf_Signed)loc->lr_number;
-		op = pf->fbloc.ld_s[0].lr_atom;
-		loc = &pf->fbloc.ld_s[0];
-	} else
-		offs = 0;
+		offs = op->number;
+		op = &pf->fb_ops[0];
+	}
 
-	if (op >= DW_OP_breg0 && op <= DW_OP_breg31) {
-		regn = op - DW_OP_breg0;
-		offs += (Dwarf_Signed)loc->lr_number;
+	if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
+		regn = op->atom - DW_OP_breg0;
+		offs += op->number;
 		deref = 1;
-	} else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) {
-		regn = op - DW_OP_reg0;
-	} else if (op == DW_OP_bregx) {
-		regn = loc->lr_number;
-		offs += (Dwarf_Signed)loc->lr_number2;
+	} else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) {
+		regn = op->atom - DW_OP_reg0;
+	} else if (op->atom == DW_OP_bregx) {
+		regn = op->number;
+		offs += op->number2;
 		deref = 1;
-	} else if (op == DW_OP_regx) {
-		regn = loc->lr_number;
+	} else if (op->atom == DW_OP_regx) {
+		regn = op->number;
 	} else
-		die("Dwarf_OP %d is not supported.", op);
+		die("DW_OP %d is not supported.", op->atom);
 
 	regs = get_arch_regstr(regn);
 	if (!regs)
-		die("%lld exceeds max register number.", regn);
+		die("%u exceeds max register number.", regn);
 
 	if (deref)
-		ret = snprintf(pf->buf, pf->len,
-				 " %s=%+lld(%s)", pf->var, offs, regs);
+		ret = snprintf(pf->buf, pf->len, " %s=+%ju(%s)",
+			       pf->var, (uintmax_t)offs, regs);
 	else
 		ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs);
 	DIE_IF(ret < 0);
@@ -443,52 +342,37 @@
 }
 
 /* Show a variables in kprobe event format */
-static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
+static void show_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
 {
 	Dwarf_Attribute attr;
-	Dwarf_Locdesc ld;
+	Dwarf_Op *expr;
+	size_t nexpr;
 	int ret;
 
-	ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error);
-	if (ret != DW_DLV_OK)
+	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
 		goto error;
-	ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base));
-	if (ret != DW_DLV_OK)
+	/* TODO: handle more than 1 exprs */
+	ret = dwarf_getlocation_addr(&attr, (pf->addr - pf->cu_base),
+				     &expr, &nexpr, 1);
+	if (ret <= 0 || nexpr == 0)
 		goto error;
-	/* TODO? */
-	DIE_IF(ld.ld_cents != 1);
-	show_location(&ld.ld_s[0], pf);
-	free(ld.ld_s);
-	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
+
+	show_location(expr, pf);
+	/* *expr will be cached in libdw. Don't free it. */
 	return ;
 error:
+	/* TODO: Support const_value */
 	die("Failed to find the location of %s at this address.\n"
 	    " Perhaps, it has been optimized out.", pf->var);
 }
 
-static int variable_callback(struct die_link *dlink, void *data)
-{
-	struct probe_finder *pf = (struct probe_finder *)data;
-	Dwarf_Half tag;
-	int ret;
-
-	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if ((tag == DW_TAG_formal_parameter ||
-	     tag == DW_TAG_variable) &&
-	    (die_compare_name(dlink->die, pf->var) == 0)) {
-		show_variable(dlink->die, pf);
-		return 1;
-	}
-	/* TODO: Support struct members and arrays */
-	return 0;
-}
-
 /* Find a variable in a subprogram die */
-static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
+static void find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
 {
 	int ret;
+	Dwarf_Die vr_die;
 
+	/* TODO: Support struct members and arrays */
 	if (!is_c_varname(pf->var)) {
 		/* Output raw parameters */
 		ret = snprintf(pf->buf, pf->len, " %s", pf->var);
@@ -499,58 +383,51 @@
 
 	pr_debug("Searching '%s' variable in context.\n", pf->var);
 	/* Search child die for local variables and parameters. */
-	ret = search_die_from_children(sp_die, variable_callback, pf);
-	if (!ret)
+	if (!die_find_variable(sp_die, pf->var, &vr_die))
 		die("Failed to find '%s' in this function.", pf->var);
-}
 
-/* Get a frame base on the address */
-static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf)
-{
-	Dwarf_Attribute attr;
-	int ret;
-
-	ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
-	ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base));
-	DIE_IF(ret != DW_DLV_OK);
-	dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR);
-}
-
-static void free_current_frame_base(struct probe_finder *pf)
-{
-	free(pf->fbloc.ld_s);
-	memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc));
+	show_variable(&vr_die, pf);
 }
 
 /* Show a probe point to output buffer */
-static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs,
-			    struct probe_finder *pf)
+static void show_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
 {
 	struct probe_point *pp = pf->pp;
-	char *name;
+	Dwarf_Addr eaddr;
+	Dwarf_Die die_mem;
+	const char *name;
 	char tmp[MAX_PROBE_BUFFER];
 	int ret, i, len;
+	Dwarf_Attribute fb_attr;
+	size_t nops;
+
+	/* If no real subprogram, find a real one */
+	if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
+		sp_die = die_get_real_subprogram(&pf->cu_die,
+						 pf->addr, &die_mem);
+		if (!sp_die)
+			die("Probe point is not found in subprograms.");
+	}
 
 	/* Output name of probe point */
-	ret = dwarf_diename(sp_die, &name, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (ret == DW_DLV_OK) {
-		ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name,
-				(unsigned int)offs);
+	name = dwarf_diename(sp_die);
+	if (name) {
+		dwarf_entrypc(sp_die, &eaddr);
+		ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%lu", name,
+				(unsigned long)(pf->addr - eaddr));
 		/* Copy the function name if possible */
 		if (!pp->function) {
 			pp->function = strdup(name);
-			pp->offset = offs;
+			pp->offset = (size_t)(pf->addr - eaddr);
 		}
-		dwarf_dealloc(__dw_debug, name, DW_DLA_STRING);
 	} else {
 		/* This function has no name. */
-		ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr);
+		ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%jx",
+			       (uintmax_t)pf->addr);
 		if (!pp->function) {
 			/* TODO: Use _stext */
 			pp->function = strdup("");
-			pp->offset = (int)pf->addr;
+			pp->offset = (size_t)pf->addr;
 		}
 	}
 	DIE_IF(ret < 0);
@@ -558,8 +435,15 @@
 	len = ret;
 	pr_debug("Probe point found: %s\n", tmp);
 
+	/* Get the frame base attribute/ops */
+	dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr);
+	ret = dwarf_getlocation_addr(&fb_attr, (pf->addr - pf->cu_base),
+				     &pf->fb_ops, &nops, 1);
+	if (ret <= 0 || nops == 0)
+		pf->fb_ops = NULL;
+
 	/* Find each argument */
-	get_current_frame_base(sp_die, pf);
+	/* TODO: use dwarf_cfi_addrframe */
 	for (i = 0; i < pp->nr_args; i++) {
 		pf->var = pp->args[i];
 		pf->buf = &tmp[len];
@@ -567,289 +451,327 @@
 		find_variable(sp_die, pf);
 		len += strlen(pf->buf);
 	}
-	free_current_frame_base(pf);
+
+	/* *pf->fb_ops will be cached in libdw. Don't free it. */
+	pf->fb_ops = NULL;
 
 	pp->probes[pp->found] = strdup(tmp);
 	pp->found++;
 }
 
-static int probeaddr_callback(struct die_link *dlink, void *data)
-{
-	struct probe_finder *pf = (struct probe_finder *)data;
-	Dwarf_Half tag;
-	Dwarf_Signed offs;
-	int ret;
-
-	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	/* Check the address is in this subprogram */
-	if (tag == DW_TAG_subprogram &&
-	    die_within_subprogram(dlink->die, pf->addr, &offs)) {
-		show_probepoint(dlink->die, offs, pf);
-		return 1;
-	}
-	return 0;
-}
-
 /* Find probe point from its line number */
 static void find_probe_point_by_line(struct probe_finder *pf)
 {
-	Dwarf_Signed cnt, i, clm;
-	Dwarf_Line *lines;
-	Dwarf_Unsigned lineno = 0;
+	Dwarf_Lines *lines;
+	Dwarf_Line *line;
+	size_t nlines, i;
 	Dwarf_Addr addr;
-	Dwarf_Unsigned fno;
+	int lineno;
 	int ret;
 
-	ret = dwarf_srclines(pf->cu_die, &lines, &cnt, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
+	ret = dwarf_getsrclines(&pf->cu_die, &lines, &nlines);
+	DIE_IF(ret != 0);
 
-	for (i = 0; i < cnt; i++) {
-		ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
-		if (fno != pf->fno)
-			continue;
-
-		ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
+	for (i = 0; i < nlines; i++) {
+		line = dwarf_onesrcline(lines, i);
+		dwarf_lineno(line, &lineno);
 		if (lineno != pf->lno)
 			continue;
 
-		ret = dwarf_lineoff(lines[i], &clm, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
+		/* TODO: Get fileno from line, but how? */
+		if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
+			continue;
 
-		ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
-		pr_debug("Probe line found: line[%d]:%u,%d addr:0x%llx\n",
-			 (int)i, (unsigned)lineno, (int)clm, addr);
+		ret = dwarf_lineaddr(line, &addr);
+		DIE_IF(ret != 0);
+		pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n",
+			 (int)i, lineno, (uintmax_t)addr);
 		pf->addr = addr;
-		/* Search a real subprogram including this line, */
-		ret = search_die_from_children(pf->cu_die,
-					       probeaddr_callback, pf);
-		if (ret == 0)
-			die("Probe point is not found in subprograms.");
+
+		show_probe_point(NULL, pf);
 		/* Continuing, because target line might be inlined. */
 	}
-	dwarf_srclines_dealloc(__dw_debug, lines, cnt);
 }
 
-/* Search function from function name */
-static int probefunc_callback(struct die_link *dlink, void *data)
+/* Find lines which match lazy pattern */
+static int find_lazy_match_lines(struct list_head *head,
+				 const char *fname, const char *pat)
+{
+	char *fbuf, *p1, *p2;
+	int fd, line, nlines = 0;
+	struct stat st;
+
+	fd = open(fname, O_RDONLY);
+	if (fd < 0)
+		die("failed to open %s", fname);
+	DIE_IF(fstat(fd, &st) < 0);
+	fbuf = malloc(st.st_size + 2);
+	DIE_IF(fbuf == NULL);
+	DIE_IF(read(fd, fbuf, st.st_size) < 0);
+	close(fd);
+	fbuf[st.st_size] = '\n';	/* Dummy line */
+	fbuf[st.st_size + 1] = '\0';
+	p1 = fbuf;
+	line = 1;
+	while ((p2 = strchr(p1, '\n')) != NULL) {
+		*p2 = '\0';
+		if (strlazymatch(p1, pat)) {
+			line_list__add_line(head, line);
+			nlines++;
+		}
+		line++;
+		p1 = p2 + 1;
+	}
+	free(fbuf);
+	return nlines;
+}
+
+/* Find probe points from lazy pattern  */
+static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	Dwarf_Lines *lines;
+	Dwarf_Line *line;
+	size_t nlines, i;
+	Dwarf_Addr addr;
+	Dwarf_Die die_mem;
+	int lineno;
+	int ret;
+
+	if (list_empty(&pf->lcache)) {
+		/* Matching lazy line pattern */
+		ret = find_lazy_match_lines(&pf->lcache, pf->fname,
+					    pf->pp->lazy_line);
+		if (ret <= 0)
+			die("No matched lines found in %s.", pf->fname);
+	}
+
+	ret = dwarf_getsrclines(&pf->cu_die, &lines, &nlines);
+	DIE_IF(ret != 0);
+	for (i = 0; i < nlines; i++) {
+		line = dwarf_onesrcline(lines, i);
+
+		dwarf_lineno(line, &lineno);
+		if (!line_list__has_line(&pf->lcache, lineno))
+			continue;
+
+		/* TODO: Get fileno from line, but how? */
+		if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
+			continue;
+
+		ret = dwarf_lineaddr(line, &addr);
+		DIE_IF(ret != 0);
+		if (sp_die) {
+			/* Address filtering 1: does sp_die include addr? */
+			if (!dwarf_haspc(sp_die, addr))
+				continue;
+			/* Address filtering 2: No child include addr? */
+			if (die_get_inlinefunc(sp_die, addr, &die_mem))
+				continue;
+		}
+
+		pr_debug("Probe line found: line[%d]:%d addr:0x%llx\n",
+			 (int)i, lineno, (unsigned long long)addr);
+		pf->addr = addr;
+
+		show_probe_point(sp_die, pf);
+		/* Continuing, because target line might be inlined. */
+	}
+	/* TODO: deallocate lines, but how? */
+}
+
+static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
 {
 	struct probe_finder *pf = (struct probe_finder *)data;
 	struct probe_point *pp = pf->pp;
-	struct die_link *lk;
-	Dwarf_Signed offs;
-	Dwarf_Half tag;
-	int ret;
 
-	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (tag == DW_TAG_subprogram) {
-		if (die_compare_name(dlink->die, pp->function) == 0) {
-			if (pp->line) {	/* Function relative line */
-				pf->fno = die_get_decl_file(dlink->die);
-				pf->lno = die_get_decl_line(dlink->die)
-					 + pp->line;
-				find_probe_point_by_line(pf);
-				return 1;
-			}
-			if (die_inlined_subprogram(dlink->die)) {
-				/* Inlined function, save it. */
-				ret = dwarf_die_CU_offset(dlink->die,
-							  &pf->inl_offs,
-							  &__dw_error);
-				DIE_IF(ret != DW_DLV_OK);
-				pr_debug("inline definition offset %lld\n",
-					 pf->inl_offs);
-				return 0;	/* Continue to search */
-			}
-			/* Get probe address */
-			pf->addr = die_get_entrypc(dlink->die);
+	if (pp->lazy_line)
+		find_probe_point_lazy(in_die, pf);
+	else {
+		/* Get probe address */
+		pf->addr = die_get_entrypc(in_die);
+		pf->addr += pp->offset;
+		pr_debug("found inline addr: 0x%jx\n",
+			 (uintmax_t)pf->addr);
+
+		show_probe_point(in_die, pf);
+	}
+
+	return DWARF_CB_OK;
+}
+
+/* Search function from function name */
+static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
+{
+	struct probe_finder *pf = (struct probe_finder *)data;
+	struct probe_point *pp = pf->pp;
+
+	/* Check tag and diename */
+	if (dwarf_tag(sp_die) != DW_TAG_subprogram ||
+	    die_compare_name(sp_die, pp->function) != 0)
+		return 0;
+
+	pf->fname = dwarf_decl_file(sp_die);
+	if (pp->line) { /* Function relative line */
+		dwarf_decl_line(sp_die, &pf->lno);
+		pf->lno += pp->line;
+		find_probe_point_by_line(pf);
+	} else if (!dwarf_func_inline(sp_die)) {
+		/* Real function */
+		if (pp->lazy_line)
+			find_probe_point_lazy(sp_die, pf);
+		else {
+			pf->addr = die_get_entrypc(sp_die);
 			pf->addr += pp->offset;
 			/* TODO: Check the address in this function */
-			show_probepoint(dlink->die, pp->offset, pf);
-			return 1; /* Exit; no same symbol in this CU. */
+			show_probe_point(sp_die, pf);
 		}
-	} else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) {
-		if (die_get_abstract_origin(dlink->die) == pf->inl_offs) {
-			/* Get probe address */
-			pf->addr = die_get_entrypc(dlink->die);
-			pf->addr += pp->offset;
-			pr_debug("found inline addr: 0x%llx\n", pf->addr);
-			/* Inlined function. Get a real subprogram */
-			for (lk = dlink->parent; lk != NULL; lk = lk->parent) {
-				tag = 0;
-				dwarf_tag(lk->die, &tag, &__dw_error);
-				DIE_IF(ret == DW_DLV_ERROR);
-				if (tag == DW_TAG_subprogram &&
-				    !die_inlined_subprogram(lk->die))
-					goto found;
-			}
-			die("Failed to find real subprogram.");
-found:
-			/* Get offset from subprogram */
-			ret = die_within_subprogram(lk->die, pf->addr, &offs);
-			DIE_IF(!ret);
-			show_probepoint(lk->die, offs, pf);
-			/* Continue to search */
-		}
-	}
-	return 0;
+	} else
+		/* Inlined function: search instances */
+		dwarf_func_inline_instances(sp_die, probe_point_inline_cb, pf);
+
+	return 1; /* Exit; no same symbol in this CU. */
 }
 
 static void find_probe_point_by_func(struct probe_finder *pf)
 {
-	search_die_from_children(pf->cu_die, probefunc_callback, pf);
+	dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, pf, 0);
 }
 
 /* Find a probe point */
-int find_probepoint(int fd, struct probe_point *pp)
+int find_probe_point(int fd, struct probe_point *pp)
 {
-	Dwarf_Half addr_size = 0;
-	Dwarf_Unsigned next_cuh = 0;
-	int cu_number = 0, ret;
 	struct probe_finder pf = {.pp = pp};
+	int ret;
+	Dwarf_Off off, noff;
+	size_t cuhl;
+	Dwarf_Die *diep;
+	Dwarf *dbg;
 
-	ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
-	if (ret != DW_DLV_OK)
+	dbg = dwarf_begin(fd, DWARF_C_READ);
+	if (!dbg)
 		return -ENOENT;
 
 	pp->found = 0;
-	while (++cu_number) {
-		/* Search CU (Compilation Unit) */
-		ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
-			&addr_size, &next_cuh, &__dw_error);
-		DIE_IF(ret == DW_DLV_ERROR);
-		if (ret == DW_DLV_NO_ENTRY)
-			break;
-
+	off = 0;
+	line_list__init(&pf.lcache);
+	/* Loop on CUs (Compilation Unit) */
+	while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
 		/* Get the DIE(Debugging Information Entry) of this CU */
-		ret = dwarf_siblingof(__dw_debug, 0, &pf.cu_die, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
+		diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die);
+		if (!diep)
+			continue;
 
 		/* Check if target file is included. */
 		if (pp->file)
-			pf.fno = cu_find_fileno(pf.cu_die, pp->file);
+			pf.fname = cu_find_realpath(&pf.cu_die, pp->file);
+		else
+			pf.fname = NULL;
 
-		if (!pp->file || pf.fno) {
+		if (!pp->file || pf.fname) {
 			/* Save CU base address (for frame_base) */
-			ret = dwarf_lowpc(pf.cu_die, &pf.cu_base, &__dw_error);
-			DIE_IF(ret == DW_DLV_ERROR);
-			if (ret == DW_DLV_NO_ENTRY)
+			ret = dwarf_lowpc(&pf.cu_die, &pf.cu_base);
+			if (ret != 0)
 				pf.cu_base = 0;
 			if (pp->function)
 				find_probe_point_by_func(&pf);
+			else if (pp->lazy_line)
+				find_probe_point_lazy(NULL, &pf);
 			else {
 				pf.lno = pp->line;
 				find_probe_point_by_line(&pf);
 			}
 		}
-		dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
+		off = noff;
 	}
-	ret = dwarf_finish(__dw_debug, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
+	line_list__free(&pf.lcache);
+	dwarf_end(dbg);
 
 	return pp->found;
 }
 
-
-static void line_range_add_line(struct line_range *lr, unsigned int line)
-{
-	struct line_node *ln;
-	struct list_head *p;
-
-	/* Reverse search, because new line will be the last one */
-	list_for_each_entry_reverse(ln, &lr->line_list, list) {
-		if (ln->line < line) {
-			p = &ln->list;
-			goto found;
-		} else if (ln->line == line)	/* Already exist */
-			return ;
-	}
-	/* List is empty, or the smallest entry */
-	p = &lr->line_list;
-found:
-	pr_debug("Debug: add a line %u\n", line);
-	ln = zalloc(sizeof(struct line_node));
-	DIE_IF(ln == NULL);
-	ln->line = line;
-	INIT_LIST_HEAD(&ln->list);
-	list_add(&ln->list, p);
-}
-
 /* Find line range from its line number */
-static void find_line_range_by_line(struct line_finder *lf)
+static void find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
 {
-	Dwarf_Signed cnt, i;
-	Dwarf_Line *lines;
-	Dwarf_Unsigned lineno = 0;
-	Dwarf_Unsigned fno;
+	Dwarf_Lines *lines;
+	Dwarf_Line *line;
+	size_t nlines, i;
 	Dwarf_Addr addr;
+	int lineno;
 	int ret;
+	const char *src;
+	Dwarf_Die die_mem;
 
-	ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
+	line_list__init(&lf->lr->line_list);
+	ret = dwarf_getsrclines(&lf->cu_die, &lines, &nlines);
+	DIE_IF(ret != 0);
 
-	for (i = 0; i < cnt; i++) {
-		ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
-		if (fno != lf->fno)
-			continue;
-
-		ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
+	for (i = 0; i < nlines; i++) {
+		line = dwarf_onesrcline(lines, i);
+		ret = dwarf_lineno(line, &lineno);
+		DIE_IF(ret != 0);
 		if (lf->lno_s > lineno || lf->lno_e < lineno)
 			continue;
 
-		/* Filter line in the function address range */
-		if (lf->addr_s && lf->addr_e) {
-			ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
-			DIE_IF(ret != DW_DLV_OK);
-			if (lf->addr_s > addr || lf->addr_e <= addr)
+		if (sp_die) {
+			/* Address filtering 1: does sp_die include addr? */
+			ret = dwarf_lineaddr(line, &addr);
+			DIE_IF(ret != 0);
+			if (!dwarf_haspc(sp_die, addr))
+				continue;
+
+			/* Address filtering 2: No child include addr? */
+			if (die_get_inlinefunc(sp_die, addr, &die_mem))
 				continue;
 		}
-		line_range_add_line(lf->lr, (unsigned int)lineno);
+
+		/* TODO: Get fileno from line, but how? */
+		src = dwarf_linesrc(line, NULL, NULL);
+		if (strtailcmp(src, lf->fname) != 0)
+			continue;
+
+		/* Copy real path */
+		if (!lf->lr->path)
+			lf->lr->path = strdup(src);
+		line_list__add_line(&lf->lr->line_list, (unsigned int)lineno);
 	}
-	dwarf_srclines_dealloc(__dw_debug, lines, cnt);
+	/* Update status */
 	if (!list_empty(&lf->lr->line_list))
 		lf->found = 1;
+	else {
+		free(lf->lr->path);
+		lf->lr->path = NULL;
+	}
+}
+
+static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
+{
+	find_line_range_by_line(in_die, (struct line_finder *)data);
+	return DWARF_CB_ABORT;	/* No need to find other instances */
 }
 
 /* Search function from function name */
-static int linefunc_callback(struct die_link *dlink, void *data)
+static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
 {
 	struct line_finder *lf = (struct line_finder *)data;
 	struct line_range *lr = lf->lr;
-	Dwarf_Half tag;
-	int ret;
 
-	ret = dwarf_tag(dlink->die, &tag, &__dw_error);
-	DIE_IF(ret == DW_DLV_ERROR);
-	if (tag == DW_TAG_subprogram &&
-	    die_compare_name(dlink->die, lr->function) == 0) {
-		/* Get the address range of this function */
-		ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error);
-		if (ret == DW_DLV_OK)
-			ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error);
-		DIE_IF(ret == DW_DLV_ERROR);
-		if (ret == DW_DLV_NO_ENTRY) {
-			lf->addr_s = 0;
-			lf->addr_e = 0;
-		}
-
-		lf->fno = die_get_decl_file(dlink->die);
-		lr->offset = die_get_decl_line(dlink->die);;
+	if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
+	    die_compare_name(sp_die, lr->function) == 0) {
+		lf->fname = dwarf_decl_file(sp_die);
+		dwarf_decl_line(sp_die, &lr->offset);
+		pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
 		lf->lno_s = lr->offset + lr->start;
 		if (!lr->end)
-			lf->lno_e = (Dwarf_Unsigned)-1;
+			lf->lno_e = INT_MAX;
 		else
 			lf->lno_e = lr->offset + lr->end;
 		lr->start = lf->lno_s;
 		lr->end = lf->lno_e;
-		find_line_range_by_line(lf);
-		/* If we find a target function, this should be end. */
-		lf->found = 1;
+		if (dwarf_func_inline(sp_die))
+			dwarf_func_inline_instances(sp_die,
+						    line_range_inline_cb, lf);
+		else
+			find_line_range_by_line(sp_die, lf);
 		return 1;
 	}
 	return 0;
@@ -857,55 +779,55 @@
 
 static void find_line_range_by_func(struct line_finder *lf)
 {
-	search_die_from_children(lf->cu_die, linefunc_callback, lf);
+	dwarf_getfuncs(&lf->cu_die, line_range_search_cb, lf, 0);
 }
 
 int find_line_range(int fd, struct line_range *lr)
 {
-	Dwarf_Half addr_size = 0;
-	Dwarf_Unsigned next_cuh = 0;
+	struct line_finder lf = {.lr = lr, .found = 0};
 	int ret;
-	struct line_finder lf = {.lr = lr};
+	Dwarf_Off off = 0, noff;
+	size_t cuhl;
+	Dwarf_Die *diep;
+	Dwarf *dbg;
 
-	ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
-	if (ret != DW_DLV_OK)
+	dbg = dwarf_begin(fd, DWARF_C_READ);
+	if (!dbg)
 		return -ENOENT;
 
+	/* Loop on CUs (Compilation Unit) */
 	while (!lf.found) {
-		/* Search CU (Compilation Unit) */
-		ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
-			&addr_size, &next_cuh, &__dw_error);
-		DIE_IF(ret == DW_DLV_ERROR);
-		if (ret == DW_DLV_NO_ENTRY)
+		ret = dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL);
+		if (ret != 0)
 			break;
 
 		/* Get the DIE(Debugging Information Entry) of this CU */
-		ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error);
-		DIE_IF(ret != DW_DLV_OK);
+		diep = dwarf_offdie(dbg, off + cuhl, &lf.cu_die);
+		if (!diep)
+			continue;
 
 		/* Check if target file is included. */
 		if (lr->file)
-			lf.fno = cu_find_fileno(lf.cu_die, lr->file);
+			lf.fname = cu_find_realpath(&lf.cu_die, lr->file);
+		else
+			lf.fname = 0;
 
-		if (!lr->file || lf.fno) {
+		if (!lr->file || lf.fname) {
 			if (lr->function)
 				find_line_range_by_func(&lf);
 			else {
 				lf.lno_s = lr->start;
 				if (!lr->end)
-					lf.lno_e = (Dwarf_Unsigned)-1;
+					lf.lno_e = INT_MAX;
 				else
 					lf.lno_e = lr->end;
-				find_line_range_by_line(&lf);
+				find_line_range_by_line(NULL, &lf);
 			}
-			/* Get the real file path */
-			if (lf.found)
-				cu_get_filename(lf.cu_die, lf.fno, &lr->path);
 		}
-		dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE);
+		off = noff;
 	}
-	ret = dwarf_finish(__dw_debug, &__dw_error);
-	DIE_IF(ret != DW_DLV_OK);
+	pr_debug("path: %lx\n", (unsigned long)lr->path);
+	dwarf_end(dbg);
 	return lf.found;
 }
 
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 972b386..d1a6517 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -1,6 +1,7 @@
 #ifndef _PROBE_FINDER_H
 #define _PROBE_FINDER_H
 
+#include <stdbool.h>
 #include "util.h"
 
 #define MAX_PATH_LEN		 256
@@ -20,6 +21,7 @@
 	/* Inputs */
 	char			*file;			/* File name */
 	int			line;			/* Line number */
+	char			*lazy_line;		/* Lazy line pattern */
 
 	char			*function;		/* Function name */
 	int			offset;			/* Offset bytes */
@@ -46,53 +48,46 @@
 	char			*function;		/* Function name */
 	unsigned int		start;			/* Start line number */
 	unsigned int		end;			/* End line number */
-	unsigned int		offset;			/* Start line offset */
+	int			offset;			/* Start line offset */
 	char			*path;			/* Real path name */
 	struct list_head	line_list;		/* Visible lines */
 };
 
-#ifndef NO_LIBDWARF
-extern int find_probepoint(int fd, struct probe_point *pp);
+#ifndef NO_DWARF_SUPPORT
+extern int find_probe_point(int fd, struct probe_point *pp);
 extern int find_line_range(int fd, struct line_range *lr);
 
-/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */
-#ifndef _MIPS_SZLONG
-# define _MIPS_SZLONG		0
-#endif
-
 #include <dwarf.h>
-#include <libdwarf.h>
+#include <libdw.h>
 
 struct probe_finder {
-	struct probe_point	*pp;			/* Target probe point */
+	struct probe_point	*pp;		/* Target probe point */
 
 	/* For function searching */
-	Dwarf_Addr		addr;			/* Address */
-	Dwarf_Unsigned		fno;			/* File number */
-	Dwarf_Unsigned		lno;			/* Line number */
-	Dwarf_Off		inl_offs;		/* Inline offset */
-	Dwarf_Die		cu_die;			/* Current CU */
+	Dwarf_Addr		addr;		/* Address */
+	const char		*fname;		/* File name */
+	int			lno;		/* Line number */
+	Dwarf_Die		cu_die;		/* Current CU */
 
 	/* For variable searching */
-	Dwarf_Addr		cu_base;		/* Current CU base address */
-	Dwarf_Locdesc		fbloc;			/* Location of Current Frame Base */
-	const char		*var;			/* Current variable name */
-	char			*buf;			/* Current output buffer */
-	int			len;			/* Length of output buffer */
+	Dwarf_Op		*fb_ops;	/* Frame base attribute */
+	Dwarf_Addr		cu_base;	/* Current CU base address */
+	const char		*var;		/* Current variable name */
+	char			*buf;		/* Current output buffer */
+	int			len;		/* Length of output buffer */
+	struct list_head	lcache;		/* Line cache for lazy match */
 };
 
 struct line_finder {
-	struct line_range	*lr;			/* Target line range */
+	struct line_range	*lr;		/* Target line range */
 
-	Dwarf_Unsigned		fno;			/* File number */
-	Dwarf_Unsigned		lno_s;			/* Start line number */
-	Dwarf_Unsigned		lno_e;			/* End line number */
-	Dwarf_Addr		addr_s;			/* Start address */
-	Dwarf_Addr		addr_e;			/* End address */
-	Dwarf_Die		cu_die;			/* Current CU */
+	const char		*fname;		/* File name */
+	int			lno_s;		/* Start line number */
+	int			lno_e;		/* End line number */
+	Dwarf_Die		cu_die;		/* Current CU */
 	int			found;
 };
 
-#endif /* NO_LIBDWARF */
+#endif /* NO_DWARF_SUPPORT */
 
 #endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index c397d4f..a175949 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -265,21 +265,21 @@
 	return false;
 }
 
-/**
- * strglobmatch - glob expression pattern matching
- * @str: the target string to match
- * @pat: the pattern string to match
- *
- * This returns true if the @str matches @pat. @pat can includes wildcards
- * ('*','?') and character classes ([CHARS], complementation and ranges are
- * also supported). Also, this supports escape character ('\') to use special
- * characters as normal character.
- *
- * Note: if @pat syntax is broken, this always returns false.
- */
-bool strglobmatch(const char *str, const char *pat)
+/* Glob/lazy pattern matching */
+static bool __match_glob(const char *str, const char *pat, bool ignore_space)
 {
 	while (*str && *pat && *pat != '*') {
+		if (ignore_space) {
+			/* Ignore spaces for lazy matching */
+			if (isspace(*str)) {
+				str++;
+				continue;
+			}
+			if (isspace(*pat)) {
+				pat++;
+				continue;
+			}
+		}
 		if (*pat == '?') {	/* Matches any single character */
 			str++;
 			pat++;
@@ -308,3 +308,32 @@
 	return !*str && !*pat;
 }
 
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
+bool strglobmatch(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, false);
+}
+
+/**
+ * strlazymatch - matching pattern strings lazily with glob pattern
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This is similar to strglobmatch, except this ignores spaces in
+ * the target string.
+ */
+bool strlazymatch(const char *str, const char *pat)
+{
+	return __match_glob(str, pat, true);
+}
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
index 02ede58..542e44d 100644
--- a/tools/perf/util/string.h
+++ b/tools/perf/util/string.h
@@ -10,6 +10,7 @@
 char **argv_split(const char *str, int *argcp);
 void argv_free(char **argv);
 bool strglobmatch(const char *str, const char *pat);
+bool strlazymatch(const char *str, const char *pat);
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index daece36..7f1178f 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -12,3 +12,6 @@
 
 config KVM_APIC_ARCHITECTURE
        bool
+
+config KVM_MMIO
+       bool
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f73de63..057e2cc 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -504,12 +504,12 @@
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      struct kvm_assigned_pci_dev *assigned_dev)
 {
-	int r = 0;
+	int r = 0, idx;
 	struct kvm_assigned_dev_kernel *match;
 	struct pci_dev *dev;
 
 	mutex_lock(&kvm->lock);
-	down_read(&kvm->slots_lock);
+	idx = srcu_read_lock(&kvm->srcu);
 
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_dev->assigned_dev_id);
@@ -526,7 +526,8 @@
 		r = -ENOMEM;
 		goto out;
 	}
-	dev = pci_get_bus_and_slot(assigned_dev->busnr,
+	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+				   assigned_dev->busnr,
 				   assigned_dev->devfn);
 	if (!dev) {
 		printk(KERN_INFO "%s: host device not found\n", __func__);
@@ -548,6 +549,7 @@
 	pci_reset_function(dev);
 
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_segnr = assigned_dev->segnr;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
@@ -573,7 +575,7 @@
 	}
 
 out:
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	mutex_unlock(&kvm->lock);
 	return r;
 out_list_del:
@@ -585,7 +587,7 @@
 	pci_dev_put(dev);
 out_free:
 	kfree(match);
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	mutex_unlock(&kvm->lock);
 	return r;
 }
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 04d69cd..5169736 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -92,41 +92,64 @@
 int kvm_coalesced_mmio_init(struct kvm *kvm)
 {
 	struct kvm_coalesced_mmio_dev *dev;
+	struct page *page;
 	int ret;
 
+	ret = -ENOMEM;
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		goto out_err;
+	kvm->coalesced_mmio_ring = page_address(page);
+
+	ret = -ENOMEM;
 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
 	if (!dev)
-		return -ENOMEM;
+		goto out_free_page;
 	spin_lock_init(&dev->lock);
 	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
 
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0)
-		kfree(dev);
+		goto out_free_dev;
 
 	return ret;
+
+out_free_dev:
+	kfree(dev);
+out_free_page:
+	__free_page(page);
+out_err:
+	return ret;
 }
 
+void kvm_coalesced_mmio_free(struct kvm *kvm)
+{
+	if (kvm->coalesced_mmio_ring)
+		free_page((unsigned long)kvm->coalesced_mmio_ring);
+}
+
 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
-				         struct kvm_coalesced_mmio_zone *zone)
+					 struct kvm_coalesced_mmio_zone *zone)
 {
 	struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
 
 	if (dev == NULL)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
-		up_write(&kvm->slots_lock);
+		mutex_unlock(&kvm->slots_lock);
 		return -ENOBUFS;
 	}
 
 	dev->zone[dev->nb_zones] = *zone;
 	dev->nb_zones++;
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return 0;
 }
 
@@ -140,10 +163,10 @@
 	if (dev == NULL)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	i = dev->nb_zones;
-	while(i) {
+	while (i) {
 		z = &dev->zone[i - 1];
 
 		/* unregister all zones
@@ -158,7 +181,7 @@
 		i--;
 	}
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	return 0;
 }
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 4b49f27..8a5959e 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -1,3 +1,6 @@
+#ifndef __KVM_COALESCED_MMIO_H__
+#define __KVM_COALESCED_MMIO_H__
+
 /*
  * KVM coalesced MMIO
  *
@@ -7,6 +10,8 @@
  *
  */
 
+#ifdef CONFIG_KVM_MMIO
+
 #define KVM_COALESCED_MMIO_ZONE_MAX 100
 
 struct kvm_coalesced_mmio_dev {
@@ -18,7 +23,17 @@
 };
 
 int kvm_coalesced_mmio_init(struct kvm *kvm);
+void kvm_coalesced_mmio_free(struct kvm *kvm);
 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
                                        struct kvm_coalesced_mmio_zone *zone);
 int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
                                          struct kvm_coalesced_mmio_zone *zone);
+
+#else
+
+static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
+static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
+
+#endif
+
+#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a9d3fc6..7016319 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -47,7 +47,6 @@
 	int                       gsi;
 	struct list_head          list;
 	poll_table                pt;
-	wait_queue_head_t        *wqh;
 	wait_queue_t              wait;
 	struct work_struct        inject;
 	struct work_struct        shutdown;
@@ -159,8 +158,6 @@
 			poll_table *pt)
 {
 	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
-
-	irqfd->wqh = wqh;
 	add_wait_queue(wqh, &irqfd->wait);
 }
 
@@ -463,7 +460,7 @@
 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p;
 	struct eventfd_ctx       *eventfd;
 	int                       ret;
@@ -508,7 +505,7 @@
 	else
 		p->wildcard = true;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	/* Verify that there isnt a match already */
 	if (ioeventfd_check_collision(kvm, p)) {
@@ -518,18 +515,18 @@
 
 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
 
-	ret = __kvm_io_bus_register_dev(bus, &p->dev);
+	ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
 	if (ret < 0)
 		goto unlock_fail;
 
 	list_add_tail(&p->list, &kvm->ioeventfds);
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	return 0;
 
 unlock_fail:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 fail:
 	kfree(p);
@@ -542,7 +539,7 @@
 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
 	int                       ret = -ENOENT;
@@ -551,7 +548,7 @@
 	if (IS_ERR(eventfd))
 		return PTR_ERR(eventfd);
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
@@ -565,13 +562,13 @@
 		if (!p->wildcard && p->datamatch != args->datamatch)
 			continue;
 
-		__kvm_io_bus_unregister_dev(bus, &p->dev);
+		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 		ioeventfd_release(p);
 		ret = 0;
 		break;
 	}
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	eventfd_ctx_put(eventfd);
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 38a2d20..3db15a8 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -100,6 +100,19 @@
 	return injected;
 }
 
+static void update_handled_vectors(struct kvm_ioapic *ioapic)
+{
+	DECLARE_BITMAP(handled_vectors, 256);
+	int i;
+
+	memset(handled_vectors, 0, sizeof(handled_vectors));
+	for (i = 0; i < IOAPIC_NUM_PINS; ++i)
+		__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
+	memcpy(ioapic->handled_vectors, handled_vectors,
+	       sizeof(handled_vectors));
+	smp_wmb();
+}
+
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
 	unsigned index;
@@ -134,6 +147,7 @@
 			e->bits |= (u32) val;
 			e->fields.remote_irr = 0;
 		}
+		update_handled_vectors(ioapic);
 		mask_after = e->fields.mask;
 		if (mask_before != mask_after)
 			kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
@@ -241,6 +255,9 @@
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 
+	smp_rmb();
+	if (!test_bit(vector, ioapic->handled_vectors))
+		return;
 	mutex_lock(&ioapic->lock);
 	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
 	mutex_unlock(&ioapic->lock);
@@ -352,6 +369,7 @@
 	ioapic->ioregsel = 0;
 	ioapic->irr = 0;
 	ioapic->id = 0;
+	update_handled_vectors(ioapic);
 }
 
 static const struct kvm_io_device_ops ioapic_mmio_ops = {
@@ -372,13 +390,28 @@
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
-	if (ret < 0)
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret < 0) {
+		kvm->arch.vioapic = NULL;
 		kfree(ioapic);
+	}
 
 	return ret;
 }
 
+void kvm_ioapic_destroy(struct kvm *kvm)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+	if (ioapic) {
+		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+		kvm->arch.vioapic = NULL;
+		kfree(ioapic);
+	}
+}
+
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 {
 	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
@@ -399,6 +432,7 @@
 
 	mutex_lock(&ioapic->lock);
 	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+	update_handled_vectors(ioapic);
 	mutex_unlock(&ioapic->lock);
 	return 0;
 }
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 419c43b..8a751b7 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -46,6 +46,7 @@
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
 	struct mutex lock;
+	DECLARE_BITMAP(handled_vectors, 256);
 };
 
 #ifdef DEBUG
@@ -71,6 +72,7 @@
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 1514758..80fd3ad 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,10 +32,10 @@
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages);
 
-int kvm_iommu_map_pages(struct kvm *kvm,
-			gfn_t base_gfn, unsigned long npages)
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	gfn_t gfn = base_gfn;
+	gfn_t gfn = slot->base_gfn;
+	unsigned long npages = slot->npages;
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
@@ -54,7 +54,7 @@
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
 			continue;
 
-		pfn = gfn_to_pfn(kvm, gfn);
+		pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
@@ -69,17 +69,19 @@
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, base_gfn, i);
+	kvm_iommu_put_pages(kvm, slot->base_gfn, i);
 	return r;
 }
 
 static int kvm_iommu_map_memslots(struct kvm *kvm)
 {
 	int i, r = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
-					kvm->memslots[i].npages);
+	slots = rcu_dereference(kvm->memslots);
+
+	for (i = 0; i < slots->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
 		if (r)
 			break;
 	}
@@ -104,7 +106,8 @@
 
 	r = iommu_attach_device(domain, &pdev->dev);
 	if (r) {
-		printk(KERN_ERR "assign device %x:%x.%x failed",
+		printk(KERN_ERR "assign device %x:%x:%x.%x failed",
+			pci_domain_nr(pdev->bus),
 			pdev->bus->number,
 			PCI_SLOT(pdev->devfn),
 			PCI_FUNC(pdev->devfn));
@@ -125,7 +128,8 @@
 			goto out_unmap;
 	}
 
-	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
+		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
@@ -152,7 +156,8 @@
 
 	iommu_detach_device(domain, &pdev->dev);
 
-	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
+		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
@@ -210,10 +215,13 @@
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
-				    kvm->memslots[i].npages);
+	slots = rcu_dereference(kvm->memslots);
+
+	for (i = 0; i < slots->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
+				    slots->memslots[i].npages);
 	}
 
 	return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a944be3..548f925 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,8 @@
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/compat.h>
+#include <linux/srcu.h>
+#include <linux/hugetlb.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -51,9 +53,7 @@
 #include <asm/pgtable.h>
 #include <asm-generic/bitops/le.h>
 
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 #include "coalesced_mmio.h"
-#endif
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
@@ -86,6 +86,8 @@
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+
 static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
@@ -136,7 +138,7 @@
 
 	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
 
-	spin_lock(&kvm->requests_lock);
+	raw_spin_lock(&kvm->requests_lock);
 	me = smp_processor_id();
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(req, &vcpu->requests))
@@ -151,7 +153,7 @@
 		smp_call_function_many(cpus, ack_flush, NULL, 1);
 	else
 		called = false;
-	spin_unlock(&kvm->requests_lock);
+	raw_spin_unlock(&kvm->requests_lock);
 	free_cpumask_var(cpus);
 	return called;
 }
@@ -215,7 +217,7 @@
 					     unsigned long address)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int need_tlb_flush;
+	int need_tlb_flush, idx;
 
 	/*
 	 * When ->invalidate_page runs, the linux pte has been zapped
@@ -235,10 +237,12 @@
 	 * pte after kvm_unmap_hva returned, without noticing the page
 	 * is going to be freed.
 	 */
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	kvm->mmu_notifier_seq++;
 	need_tlb_flush = kvm_unmap_hva(kvm, address);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
@@ -252,11 +256,14 @@
 					pte_t pte)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	kvm->mmu_notifier_seq++;
 	kvm_set_spte_hva(kvm, address, pte);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 }
 
 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -265,8 +272,9 @@
 						    unsigned long end)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int need_tlb_flush = 0;
+	int need_tlb_flush = 0, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	/*
 	 * The count increase must become visible at unlock time as no
@@ -277,6 +285,7 @@
 	for (; start < end; start += PAGE_SIZE)
 		need_tlb_flush |= kvm_unmap_hva(kvm, start);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
@@ -314,11 +323,13 @@
 					      unsigned long address)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int young;
+	int young, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	young = kvm_age_hva(kvm, address);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	if (young)
 		kvm_flush_remote_tlbs(kvm);
@@ -341,15 +352,26 @@
 	.change_pte		= kvm_mmu_notifier_change_pte,
 	.release		= kvm_mmu_notifier_release,
 };
+
+static int kvm_init_mmu_notifier(struct kvm *kvm)
+{
+	kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
+	return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
+}
+
+#else  /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
+
+static int kvm_init_mmu_notifier(struct kvm *kvm)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
 
 static struct kvm *kvm_create_vm(void)
 {
-	int r = 0;
+	int r = 0, i;
 	struct kvm *kvm = kvm_arch_create_vm();
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-	struct page *page;
-#endif
 
 	if (IS_ERR(kvm))
 		goto out;
@@ -363,39 +385,35 @@
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page) {
-		r = -ENOMEM;
+	r = -ENOMEM;
+	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!kvm->memslots)
 		goto out_err;
-	}
-	kvm->coalesced_mmio_ring =
-			(struct kvm_coalesced_mmio_ring *)page_address(page);
-#endif
-
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
-	{
-		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
-		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
-		if (r) {
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-			put_page(page);
-#endif
+	if (init_srcu_struct(&kvm->srcu))
+		goto out_err;
+	for (i = 0; i < KVM_NR_BUSES; i++) {
+		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
+					GFP_KERNEL);
+		if (!kvm->buses[i]) {
+			cleanup_srcu_struct(&kvm->srcu);
 			goto out_err;
 		}
 	}
-#endif
+
+	r = kvm_init_mmu_notifier(kvm);
+	if (r) {
+		cleanup_srcu_struct(&kvm->srcu);
+		goto out_err;
+	}
 
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
-	spin_lock_init(&kvm->requests_lock);
-	kvm_io_bus_init(&kvm->pio_bus);
+	raw_spin_lock_init(&kvm->requests_lock);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-	kvm_io_bus_init(&kvm->mmio_bus);
-	init_rwsem(&kvm->slots_lock);
+	mutex_init(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
@@ -406,12 +424,12 @@
 out:
 	return kvm;
 
-#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \
-    (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
 out_err:
 	hardware_disable_all();
-#endif
 out_err_nodisable:
+	for (i = 0; i < KVM_NR_BUSES; i++)
+		kfree(kvm->buses[i]);
+	kfree(kvm->memslots);
 	kfree(kvm);
 	return ERR_PTR(r);
 }
@@ -446,13 +464,17 @@
 void kvm_free_physmem(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
 
-	for (i = 0; i < kvm->nmemslots; ++i)
-		kvm_free_physmem_slot(&kvm->memslots[i], NULL);
+	for (i = 0; i < slots->nmemslots; ++i)
+		kvm_free_physmem_slot(&slots->memslots[i], NULL);
+
+	kfree(kvm->memslots);
 }
 
 static void kvm_destroy_vm(struct kvm *kvm)
 {
+	int i;
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
@@ -460,12 +482,9 @@
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
-	kvm_io_bus_destroy(&kvm->pio_bus);
-	kvm_io_bus_destroy(&kvm->mmio_bus);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
-	if (kvm->coalesced_mmio_ring != NULL)
-		free_page((unsigned long)kvm->coalesced_mmio_ring);
-#endif
+	for (i = 0; i < KVM_NR_BUSES; i++)
+		kvm_io_bus_destroy(kvm->buses[i]);
+	kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #else
@@ -512,12 +531,13 @@
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc)
 {
-	int r;
+	int r, flush_shadow = 0;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long i;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
+	struct kvm_memslots *slots, *old_memslots;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -532,7 +552,7 @@
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
 
-	memslot = &kvm->memslots[mem->slot];
+	memslot = &kvm->memslots->memslots[mem->slot];
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
@@ -553,7 +573,7 @@
 	/* Check for overlaps */
 	r = -EEXIST;
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *s = &kvm->memslots[i];
+		struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
 
 		if (s == memslot || !s->npages)
 			continue;
@@ -579,15 +599,7 @@
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
 		new.user_alloc = user_alloc;
-		/*
-		 * hva_to_rmmap() serialzies with the mmu_lock and to be
-		 * safe it has to ignore memslots with !user_alloc &&
-		 * !userspace_addr.
-		 */
-		if (user_alloc)
-			new.userspace_addr = mem->userspace_addr;
-		else
-			new.userspace_addr = 0;
+		new.userspace_addr = mem->userspace_addr;
 	}
 	if (!npages)
 		goto skip_lpage;
@@ -642,8 +654,9 @@
 		if (!new.dirty_bitmap)
 			goto out_free;
 		memset(new.dirty_bitmap, 0, dirty_bytes);
+		/* destroy any largepage mappings for dirty tracking */
 		if (old.npages)
-			kvm_arch_flush_shadow(kvm);
+			flush_shadow = 1;
 	}
 #else  /* not defined CONFIG_S390 */
 	new.user_alloc = user_alloc;
@@ -651,36 +664,72 @@
 		new.userspace_addr = mem->userspace_addr;
 #endif /* not defined CONFIG_S390 */
 
-	if (!npages)
+	if (!npages) {
+		r = -ENOMEM;
+		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+		if (!slots)
+			goto out_free;
+		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+		if (mem->slot >= slots->nmemslots)
+			slots->nmemslots = mem->slot + 1;
+		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+
+		old_memslots = kvm->memslots;
+		rcu_assign_pointer(kvm->memslots, slots);
+		synchronize_srcu_expedited(&kvm->srcu);
+		/* From this point no new shadow pages pointing to a deleted
+		 * memslot will be created.
+		 *
+		 * validation of sp->gfn happens in:
+		 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+		 * 	- kvm_is_visible_gfn (mmu_check_roots)
+		 */
 		kvm_arch_flush_shadow(kvm);
-
-	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->nmemslots)
-		kvm->nmemslots = mem->slot + 1;
-
-	*memslot = new;
-	spin_unlock(&kvm->mmu_lock);
-
-	r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
-	if (r) {
-		spin_lock(&kvm->mmu_lock);
-		*memslot = old;
-		spin_unlock(&kvm->mmu_lock);
-		goto out_free;
+		kfree(old_memslots);
 	}
 
-	kvm_free_physmem_slot(&old, npages ? &new : NULL);
-	/* Slot deletion case: we have to update the current slot */
-	spin_lock(&kvm->mmu_lock);
-	if (!npages)
-		*memslot = old;
-	spin_unlock(&kvm->mmu_lock);
+	r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
+	if (r)
+		goto out_free;
+
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
-	if (r)
-		goto out;
+	if (npages) {
+		r = kvm_iommu_map_pages(kvm, &new);
+		if (r)
+			goto out_free;
+	}
 #endif
+
+	r = -ENOMEM;
+	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+	if (mem->slot >= slots->nmemslots)
+		slots->nmemslots = mem->slot + 1;
+
+	/* actual memory is freed via old in kvm_free_physmem_slot below */
+	if (!npages) {
+		new.rmap = NULL;
+		new.dirty_bitmap = NULL;
+		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
+			new.lpage_info[i] = NULL;
+	}
+
+	slots->memslots[mem->slot] = new;
+	old_memslots = kvm->memslots;
+	rcu_assign_pointer(kvm->memslots, slots);
+	synchronize_srcu_expedited(&kvm->srcu);
+
+	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
+
+	kvm_free_physmem_slot(&old, &new);
+	kfree(old_memslots);
+
+	if (flush_shadow)
+		kvm_arch_flush_shadow(kvm);
+
 	return 0;
 
 out_free:
@@ -697,9 +746,9 @@
 {
 	int r;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	r = __kvm_set_memory_region(kvm, mem, user_alloc);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -726,7 +775,7 @@
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots[log->slot];
+	memslot = &kvm->memslots->memslots[log->slot];
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -780,9 +829,10 @@
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
-	for (i = 0; i < kvm->nmemslots; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	for (i = 0; i < slots->nmemslots; ++i) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
@@ -801,10 +851,14 @@
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn_instantiation(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
+
+		if (memslot->flags & KVM_MEMSLOT_INVALID)
+			continue;
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
@@ -814,33 +868,68 @@
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr, size;
+
+	size = PAGE_SIZE;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr))
+		return PAGE_SIZE;
+
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, addr);
+	if (!vma)
+		goto out;
+
+	size = vma_kernel_pagesize(vma);
+
+out:
+	up_read(&current->mm->mmap_sem);
+
+	return size;
+}
+
+int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
+	struct kvm_memory_slot *memslot = NULL;
+
+	gfn = unalias_gfn(kvm, gfn);
+	for (i = 0; i < slots->nmemslots; ++i) {
+		memslot = &slots->memslots[i];
+
+		if (gfn >= memslot->base_gfn
+		    && gfn < memslot->base_gfn + memslot->npages)
+			break;
+	}
+
+	return memslot - slots->memslots;
+}
+
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn_instantiation(kvm, gfn);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
-	if (!slot)
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 {
 	struct page *page[1];
-	unsigned long addr;
 	int npages;
 	pfn_t pfn;
 
 	might_sleep();
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr)) {
-		get_page(bad_page);
-		return page_to_pfn(bad_page);
-	}
-
 	npages = get_user_pages_fast(addr, 1, 1, page);
 
 	if (unlikely(npages != 1)) {
@@ -865,8 +954,32 @@
 	return pfn;
 }
 
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+	unsigned long addr;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr)) {
+		get_page(bad_page);
+		return page_to_pfn(bad_page);
+	}
+
+	return hva_to_pfn(kvm, addr);
+}
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
+static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+}
+
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+	return hva_to_pfn(kvm, addr);
+}
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
 	pfn_t pfn;
@@ -1854,12 +1967,7 @@
 	.priority = 0,
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus)
-{
-	memset(bus, 0, sizeof(*bus));
-}
-
-void kvm_io_bus_destroy(struct kvm_io_bus *bus)
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 {
 	int i;
 
@@ -1868,13 +1976,15 @@
 
 		kvm_iodevice_destructor(pos);
 	}
+	kfree(bus);
 }
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
 			return 0;
@@ -1882,59 +1992,71 @@
 }
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		    int len, void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
+
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
 			return 0;
 	return -EOPNOTSUPP;
 }
 
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			     struct kvm_io_device *dev)
+/* Caller must hold slots_lock. */
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			    struct kvm_io_device *dev)
 {
-	int ret;
+	struct kvm_io_bus *new_bus, *bus;
 
-	down_write(&kvm->slots_lock);
-	ret = __kvm_io_bus_register_dev(bus, dev);
-	up_write(&kvm->slots_lock);
-
-	return ret;
-}
-
-/* An unlocked version. Caller must have write lock on slots_lock. */
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			      struct kvm_io_device *dev)
-{
+	bus = kvm->buses[bus_idx];
 	if (bus->dev_count > NR_IOBUS_DEVS-1)
 		return -ENOSPC;
 
-	bus->devs[bus->dev_count++] = dev;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+	new_bus->devs[new_bus->dev_count++] = dev;
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(bus);
 
 	return 0;
 }
 
-void kvm_io_bus_unregister_dev(struct kvm *kvm,
-			       struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev)
+/* Caller must hold slots_lock. */
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev)
 {
-	down_write(&kvm->slots_lock);
-	__kvm_io_bus_unregister_dev(bus, dev);
-	up_write(&kvm->slots_lock);
-}
+	int i, r;
+	struct kvm_io_bus *new_bus, *bus;
 
-/* An unlocked version. Caller must have write lock on slots_lock. */
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev)
-{
-	int i;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
 
-	for (i = 0; i < bus->dev_count; i++)
-		if (bus->devs[i] == dev) {
-			bus->devs[i] = bus->devs[--bus->dev_count];
+	bus = kvm->buses[bus_idx];
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+
+	r = -ENOENT;
+	for (i = 0; i < new_bus->dev_count; i++)
+		if (new_bus->devs[i] == dev) {
+			r = 0;
+			new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
 			break;
 		}
+
+	if (r) {
+		kfree(new_bus);
+		return r;
+	}
+
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(bus);
+	return r;
 }
 
 static struct notifier_block kvm_cpu_notifier = {